Libraries
library(RColorBrewer)
set1 <- brewer.pal(8, "Set1")
Loading
clinicalData <- read.table("../data/Ulm1.5_s_Clinical.txt", sep = "\t", header = TRUE)
clinicalData <- clinicalData[order(clinicalData$PDID), ]
clinicalData$ERDate <- as.Date(as.character(clinicalData$ERDate), "%d-%b-%y")
clinicalData$CR_date <- as.Date(as.character(clinicalData$CR_date), "%d-%b-%y")
clinicalData$TPL_date <- as.Date(as.character(clinicalData$TPL_date), "%d-%b-%y")
clinicalData$Date_LF <- as.Date(as.character(clinicalData$Date_LF), "%d-%b-%y")
clinicalData$Recurrence_date <- as.Date(as.character(clinicalData$Recurrence_date), "%d-%b-%y")
levels(clinicalData$Study) <- c("tr98A", "tr98B", "tr0704")
clinicalData$VPA[is.na(clinicalData$VPA)] <- 0
clinicalData$ATRA_arm[is.na(clinicalData$ATRA_arm)] <- 0
colnames(clinicalData) <- gsub("\\.", "", colnames(clinicalData))
mutationData = read.table("../data/Ulm1.5_s_Genetic.txt", sep = "\t", header = TRUE)
mutationData$SAMPLE_NAME <- factor(as.character(mutationData$SAMPLE_NAME), levels = levels(clinicalData$PDID)) ## Refactor
oncogenics <- (table(mutationData[mutationData$Result %in% c("ONCOGENIC", "POSSIBLE"), c("SAMPLE_NAME", "GENE")]) > 0) +
0
Compute gene-gene interactions
interactions <- sapply(1:ncol(oncogenics), function(i) sapply(1:ncol(oncogenics), function(j) {
f <- try(fisher.test(oncogenics[, i], oncogenics[, j]), silent = TRUE)
if (class(f) == "try-error")
0 else ifelse(f$estimate > 1, -log10(f$p.val), log10(f$p.val))
}))
odds <- sapply(1:ncol(oncogenics), function(i) sapply(1:ncol(oncogenics), function(j) {
f <- try(fisher.test(oncogenics[, i] + 0.5, oncogenics[, j] + 0.5), silent = TRUE)
if (class(f) == "try-error")
f = NA else f$estimate
}))
diag(interactions) <- 0
diag(odds) <- 1
colnames(odds) <- rownames(odds) <- colnames(interactions) <- rownames(interactions) <- colnames(oncogenics)
odds[10^-abs(interactions) > 0.05] = 1
odds[odds < 0.001] = 1e-04
odds[odds > 1000] = 10000
logodds = log10(odds)
Heatmap
# pdf(paste(Sys.Date(),'-Interactions.pdf', sep=''), 4,4, pointsize = 8) ## HEATMAP OF ALL
par(bty = "n", mgp = c(2, 0.5, 0), mar = c(3, 3, 2, 2) + 0.1, las = 2, tcl = -0.33)
ix = TRUE #colnames(interactions) %in% colnames(all_genotypes)
h = hclust(dist(interactions[ix, ix]))
o = h$order #c(h$order,(length(h$order) +1):ncol(interactions))
# image(x=1:ncol(interactions), y=1:nrow(interactions), interactions[o,o], col=brewer.pal(11,'BrBG'), breaks = c(-100,
# seq(-9,9,2), 100), xaxt='n', yaxt='n', xlab='',ylab='', xlim=c(0, ncol(interactions)+3), ylim=c(0,
# ncol(interactions)+3))
image(x = 1:ncol(interactions), y = 1:nrow(interactions), log10(odds[o, o]), col = brewer.pal(9, "BrBG"), breaks = c(-4:0 -
.Machine$double.eps, 0:4), xaxt = "n", yaxt = "n", xlab = "", ylab = "", xlim = c(0, ncol(interactions) + 3), ylim = c(0,
ncol(interactions) + 3))
mtext(side = 1, at = 1:ncol(interactions), colnames(interactions)[o], cex = 0.5, font = 3)
mtext(side = 2, at = 1:ncol(interactions), colnames(interactions)[o], cex = 0.5, font = 3)
abline(h = length(h$order) + 0.5, col = "white", lwd = 1)
abline(v = length(h$order) + 0.5, col = "white", lwd = 1)
abline(h = 0:ncol(interactions) + 0.5, col = "white", lwd = 0.5)
abline(v = 0:ncol(interactions) + 0.5, col = "white", lwd = 0.5)
w = arrayInd(which(p.adjust(10^-abs(interactions[o, o]), method = "BH") < 0.1), rep(nrow(interactions), 2))
points(w, pch = ".", col = "white")
w = arrayInd(which(p.adjust(10^-abs(interactions[o, o])) < 0.05), rep(nrow(interactions), 2))
points(w, pch = "*", col = "white")
image(y = 1:8, x = rep(ncol(interactions), 2) + c(2, 3), z = matrix(c(1:8), nrow = 1), col = brewer.pal(8, "BrBG"), add = TRUE)
# axis(side = 4, at = 8:12 + .5, cex.axis=.5, tcl=-.15, label=10^-abs(seq(1,9,2)), las=1, lwd=.5) axis(side = 4, at =
# 1:5 + .5, cex.axis=.5, tcl=-.15, label=10^-abs(seq(-9,-1,2)), las=1, lwd=.5)
axis(side = 4, at = seq(1, 7) + 0.5, cex.axis = 0.5, tcl = -0.15, label = 10^seq(-3, 3), las = 1, lwd = 0.5)
# lines(rep(ncol(interactions),2)+c(1,4), c(6,6)+.5, col='white') mtext(side=4, at=-1, 'P odds < 1', cex=.5, line=-.5)
# mtext(side=4, at=15, 'P odds > 1', cex=.5, line=-.5)
points(x = rep(ncol(interactions), 2) + 2.5, y = 10:11, pch = c("*", "."))
image(x = rep(ncol(interactions), 2) + c(2, 3), y = (12:13) - 0.5, z = matrix(1), col = brewer.pal(3, "BrBG"), add = TRUE)
mtext(side = 4, at = 12:10, c("P > 0.05", "BH < 0.1", "Bf. < 0.05"), cex = 0.5, line = 0.2)
# dev.off()
library(survival)
## Loading required package: splines
all(rownames(oncogenics) == clinicalData$PDID)
## [1] TRUE
survival <- Surv(clinicalData$efs, clinicalData$EFSSTAT)
# survival <- Surv(clinicalData$OS, clinicalData$Status)
coxModel <- coxph(survival ~ gender + AOD + Study, data = clinicalData) # Most basic
phTest <- cox.zph(coxModel)
phTest
## rho chisq p
## gender -0.0144 0.235 0.6279
## AOD 0.0318 1.252 0.2632
## Studytr98B 0.0361 1.536 0.2152
## Studytr0704 0.0591 4.000 0.0455
## GLOBAL NA 8.443 0.0766
par(mfrow = c(1, 4), bty = "n")
for (i in 1:4) plot(phTest[i])
source("~/Git/Projects/CoxHD/CoxHD/R/ecoxph.R")
makeInteractions <- function(X, Y) {
do.call(cbind, lapply(X, `*`, Y))
}
makeInteger <- function(F) {
res <- as.data.frame(lapply(levels(F), `==`, F))
colnames(res) <- levels(F)
res + 0
}
trialArms <- makeInteger(clinicalData$Study)
dataList <- list(Genetics = data.frame(oncogenics[, colSums(oncogenics) > 0]), Cytogenetics = clinicalData[, 46:68], Treatment = cbind(trialArms[,
2:3], ATRA = clinicalData$ATRA_arm, VPA = clinicalData$VPA, TPL = clinicalData$Time_Diag_TPL < survival[, 1] & !is.na(clinicalData$Time_Diag_TPL)),
Clinical = cbind(clinicalData[, c("AOD", "gender", "Performance_ECOG", "BM_Blasts", "PB_Blasts", "wbc", "LDH_", "HB",
"platelet")], makeInteger(clinicalData$TypeAML)[, -1])) #,
# MolRisk = makeInteger(clinicalData$M_Risk))
dataList$Genetics$CEBPA <- clinicalData$CEBPA > 0
dataList$Genetics$FLT3 <- NULL
dataList$Genetics$FLT3_ITD <- clinicalData$ITD > 0
dataList$Genetics$FLT3_TKD <- clinicalData$TKD > 0
dataList$Genetics$IDH2_p172 <- clinicalData$IDH2 == "172"
dataList$Genetics$NPM1 <- clinicalData$NPM1
dataList$Genetics = dataList$Genetics + 0
dataList$GeneGene <- makeInteractions(data.frame(dataList$Genetics), data.frame(dataList$Genetics))[, as.vector(upper.tri(matrix(0,
ncol = ncol(dataList$Genetics), nrow = ncol(dataList$Genetics))))]
dataList$GeneGene <- dataList$GeneGene[, colSums(dataList$GeneGene, na.rm = TRUE) > 0]
dataList$GeneCyto <- makeInteractions(dataList$Genetics, dataList$Cytogenetics)
dataList$GeneCyto <- dataList$GeneCyto[, colSums(dataList$GeneCyto, na.rm = TRUE) > 0]
dataList$GeneTreat <- makeInteractions(dataList$Genetics, dataList$Treatment)
dataList$GeneTreat <- dataList$GeneTreat[, colSums(dataList$GeneTreat, na.rm = TRUE) > 0]
dataList$CytoTreat <- makeInteractions(dataList$Cytogenetics, dataList$Treatment)
dataList$CytoTreat <- dataList$CytoTreat[, colSums(dataList$CytoTreat, na.rm = TRUE) > 0]
dataFrame <- do.call(cbind, dataList)
dataFrame <- StandardizeMagnitude(dataFrame)
groups <- factor(unlist(sapply(names(dataList), function(x) rep(x, ncol(dataList[[x]])))))
## Poor man's imputation
poorMansImpute <- function(x) {
x[is.na(x)] <- mean(x, na.rm = TRUE)
return(x)
}
dataFrame <- as.data.frame(sapply(dataFrame, poorMansImpute))
## Pre-select based on univariate test
univP <- sapply(dataFrame, function(x) {
summary(coxph(survival ~ x))$logtest[3]
})
# whichRFX <- which(p.adjust(univP,'BH') < 0.1 | groups %in% c('Genetics','Treatment','Clinical','Cytogenetics'))
whichRFX <- which(colSums(dataFrame) > 5 | groups %in% c("Genetics", "Treatment", "Clinical", "Cytogenetics"))
## Fit Cox model
coxRFXFit <- CoxRFX(dataFrame[, whichRFX], survival, groups = groups[whichRFX], sigma0 = 0.1)
# coxRFXFit <- CoxRFX(dataFrame[,whichRFX], survival, groups=groups[whichRFX], sigma0=0.01) ## reassuring that sigma
# doesn't shrink further
Coefficients
par(mar = c(5, 7, 1, 1))
boxplot(coxRFXFit$coefficients ~ groups[whichRFX], las = 2, border = set1, horizontal = TRUE)
abline(v = 0)
Construct a time-dependent Surv() object by splitting TPL patients into pre and post
t <- clinicalData$Time_Diag_TPL
t[is.na(t)] <- Inf
e <- clinicalData$efs
tplIndex <- t < e
survivalTD <- Surv(time = rep(0, nrow(clinicalData)), time2 = pmin(e, t), event = ifelse(tplIndex, 0, clinicalData$EFSSTAT))
survivalTD <- rbind(survivalTD, Surv(time = t[which(tplIndex)], time2 = e[which(tplIndex)], event = clinicalData$EFSSTAT[which(tplIndex)]))
survivalTD = Surv(survivalTD[, 1], survivalTD[, 2], survivalTD[, 3])
rm(e, t)
tplSplit <- c(1:nrow(clinicalData), which(tplIndex))
Data frame
dataFrameTD <- dataFrame[tplSplit, ]
dataFrameTD[which(tplIndex), grep("TPL", colnames(dataFrameTD), value = TRUE)] <- 0 ## Set pre-tpl variables to zero
patientTD <- c(clinicalData$PDID, clinicalData$PDID[which(tplIndex)])
Fit TD Cox RFX model
# whichRFXTD <- which(p.adjust(univP,'BH') < 0.1 | groups %in% c('Genetics','Treatment','Clinical','Cytogenetics'))
whichRFXTD <- which(colSums(dataFrame) > 5 | groups %in% c("Genetics", "Treatment", "Clinical", "Cytogenetics"))
coxRFXFitTD <- CoxRFX(dataFrameTD[, whichRFXTD], survivalTD, groups[whichRFXTD], sigma0 = 0.1)
par(mar = c(5, 7, 1, 1))
boxplot(coef(coxRFXFitTD)/log(2) ~ factor(coxRFXFitTD$groups, levels = levels(groups)[order(coxRFXFitTD$mu)]), col = set1,
horizontal = TRUE, las = 2)
abline(v = 0)
plot(coef(coxRFXFit), coef(coxRFXFitTD), col = set1[groups[whichRFXTD]]) # Note the sign change for TPL..
abline(0, 1)
for (g in levels(groups)) {
par(mar = c(7, 4, 2, 0))
x <- sort(coxRFXFitTD$coefficients[groups[whichRFXTD] == g])
v <- diag(coxRFXFitTD$var)[groups[whichRFXTD] == g][order(coxRFXFitTD$coefficients[groups[whichRFXTD] == g])]
plot(x, las = 2, pch = 16, xaxt = "n", main = g, cex = 0.5 + pmin(3, -log10(pchisq((x - coxRFXFitTD$mu[g])^2/v, 1, lower.tail = FALSE))))
segments(1:length(x), x - 2 * sqrt(v), 1:length(x), x + 2 * sqrt(v))
axis(side = 1, at = 1:length(x), labels = names(x), las = 2, cex.axis = 0.6)
abline(v = 1:length(x), lty = 3, col = "grey")
abline(h = coxRFXFitTD$mu[g])
abline(h = coxRFXFitTD$mu[g] + c(-1, 1) * sqrt(coxRFXFitTD$sigma2[g]), lty = 2)
}
partRiskTD <- PartialRisk(coxRFXFitTD)
# varianceComponents <- rowSums(cov(partRiskTD, use='complete'))
varianceComponents <- diag(cov(partRiskTD, use = "complete"))
varianceComponents
## Clinical CytoTreat Cytogenetics GeneCyto GeneGene GeneTreat Genetics Treatment
## 0.06510 0.01189 0.24847 0.01847 0.05912 0.02385 0.22053 0.07738
pie(abs(varianceComponents), col = brewer.pal(8, "Set1"))
title("Risk contributions")
library(HilbertVis)
## Loading required package: grid
## Loading required package: lattice
nStars <- 32
locations <- 1.5 * hilbertCurve(log2(nStars)) #2*expand.grid(1:nStars,1:nStars)
s <- sample(nrow(partRiskTD), nStars^2) #1:(nStars^2)
h <- hclust(dist(partRiskTD[s, ]))
# stars(exp((partRisk[s,][h$order,] - rep(colMeans(partRisk), each=nStars^2)))/2, scale=FALSE, locations=locations,
# key.loc=c(0,-3), col.lines=rep(1,(nStars^2)), col.stars = brewer.pal(11,'RdBu')[cut(survivalTD[s,2][h$order],
# quantile(survivalTD[,2], seq(0,1,0.1), na.rm=TRUE))])
x <- partRiskTD - rep(colMeans(partRiskTD), each = nrow(partRiskTD))
x <- x/(2 * sd(x)) + 1
# x <- exp(x)
stars(x[s, ][h$order, ]/2, scale = FALSE, locations = locations, key.loc = c(0, -3), col.lines = rep(1, (nStars^2)), col.stars = (brewer.pal(11,
"RdBu"))[cut(survivalTD[s, 2][h$order], quantile(survivalTD[, 2], seq(0, 1, 0.1), na.rm = TRUE))])
symbols(locations[, 1], locations[, 2], circles = rep(0.5, (nStars^2)), inches = FALSE, fg = "grey", add = TRUE, lty = 1)
Harrel's C
# library(Hmisc)
totalRiskTD <- rowSums(partRiskTD)
survConcordance(survivalTD ~ totalRiskTD)
## Call:
## survConcordance(formula = survivalTD ~ totalRiskTD)
##
## n=1879 (31 observations deleted due to missingness)
## Concordance= 0.741 se= 0.009189
## concordant discordant tied.risk tied.time std(c-d)
## 803952 280999 0 3540 19939
predictiveRiskTD <- rowSums(partRiskTD[, -which(colnames(partRiskTD) %in% c("Treatment", "GeneTreat", "CytoTreat"))])
survConcordance(survivalTD ~ predictiveRiskTD)
## Call:
## survConcordance(formula = survivalTD ~ predictiveRiskTD)
##
## n=1879 (31 observations deleted due to missingness)
## Concordance= 0.7244 se= 0.009189
## concordant discordant tied.risk tied.time std(c-d)
## 785966 298985 0 3540 19939
AUC
library(survAUC)
s <- survival
plot(AUC.uno(s[!is.na(s)], s[!is.na(s)], predictiveRiskTD[!is.na(s)], seq(0, 5000, 100)))
plot(AUC.uno(s[!is.na(s)], s[!is.na(s)], totalRiskTD[!is.na(s)], seq(0, 5000, 100)), add = TRUE, col = "black")
legend("bottomright", c("w/o treatment", "w treatment"), col = 2:1, lty = 1)
Risk quartiles
predictiveRiskGroups <- cut(predictiveRiskTD[!is.na(predictiveRiskTD)], quantile(predictiveRiskTD, seq(0, 1, 0.25)), labels = c("very low",
"low", "high", "very high"))
survConcordance(survivalTD ~ as.numeric(predictiveRiskGroups))
## Call:
## survConcordance(formula = survivalTD ~ as.numeric(predictiveRiskGroups))
##
## n=1878 (32 observations deleted due to missingness)
## Concordance= 0.7059 se= 0.008825
## concordant discordant tied.risk tied.time std(c-d)
## 636570 190182 257083 3540 19131
plot(survfit(survivalTD[!is.na(predictiveRiskTD)] ~ predictiveRiskGroups), col = brewer.pal(4, "Spectral")[4:1], mark = 16)
table(clinicalData$M_Risk, predictiveRiskGroups[1:nrow(clinicalData)])[c(2, 3, 4, 1), ]
##
## very low low high very high
## Favorable 308 131 33 3
## inter-1 19 104 174 121
## inter-2 36 65 85 84
## Adverse 4 23 44 185
Risk Plots
par(mfrow = c(4, 1))
for (l in levels(clinicalData$M_Risk)[c(2, 3, 4, 1)]) barplot(sapply(split(as.data.frame(partRiskTD[1:nrow(clinicalData),
][clinicalData$M_Risk == l, ]), predictiveRiskGroups[1:nrow(clinicalData)][clinicalData$M_Risk == l]), colMeans), beside = TRUE,
legend = TRUE, col = set1[1:8], main = l, xlim = c(1, 45))
Partial values of Harrel's C
c <- PartialC(coxRFXFitTD)
b <- barplot(c[1, ], col = set1, las = 2)
segments(b, c[1, ] - c[2, ], b, c[1, ] + c[2, ])
abline(h = 0.5)
set.seed(42)
testIx <- sample(c(TRUE, FALSE), nrow(dataFrame), replace = TRUE, prob = c(0.66, 0.34))
testIxTD <- testIx[tplSplit]
Pre-select based on univariate test
p <- sapply(dataFrame, function(x) {
summary(coxph(survivalTD[testIxTD] ~ x[testIxTD]))$logtest[3]
})
whichTrain <- whichRFXTD
Fit Cox model
coxRFXFitTDTrain <- CoxRFX(dataFrameTD[testIxTD, whichTrain], survivalTD[testIxTD], groups = groups[whichTrain], sigma0 = 0.1)
Partial contributions
partialRiskTest <- PartialRisk(coxRFXFitTDTrain, newX = dataFrameTD[!testIxTD, whichTrain])
c <- PartialC(coxRFXFitTDTrain, newX = dataFrameTD[!testIxTD, whichTrain], newSurv = survivalTD[!testIxTD])
b <- barplot(c[1, ], col = set1, las = 2)
segments(b, c[1, ] - c[2, ], b, c[1, ] + c[2, ])
abline(h = 0.5)
Overall
totalRiskTest <- rowSums(partialRiskTest)
survConcordance(survivalTD[!testIxTD] ~ totalRiskTest)
## Call:
## survConcordance(formula = survivalTD[!testIxTD] ~ totalRiskTest)
##
## n=619 (11 observations deleted due to missingness)
## Concordance= 0.6821 se= 0.0158
## concordant discordant tied.risk tied.time std(c-d)
## 82741 38561 0 496 3834
Compared to molecular risk
predictiveRiskTest <- rowSums(partialRiskTest[, -which(colnames(partRiskTD) %in% c("Treatment", "GeneTreat", "CytoTreat"))])
survConcordance(survivalTD[!testIxTD] ~ predictiveRiskTest)
## Call:
## survConcordance(formula = survivalTD[!testIxTD] ~ predictiveRiskTest)
##
## n=619 (11 observations deleted due to missingness)
## Concordance= 0.6779 se= 0.0158
## concordant discordant tied.risk tied.time std(c-d)
## 82228 39074 0 496 3834
# barplot(c(CGP=survConcordance(survivalTD[!testIxTD] ~ predictiveRiskTest)$concordance, MolecularRisk =
# survConcordance(survivalTD[!testIxTD] ~ c(Favourable=1, Adverse=4, `inter-1`=2,
# `inter-2`=3)[clinicalData$M_Risk[tplSplit][!testIxTD]])[[1]]))
s <- survival[!testIx]
plot(AUC.uno(s[!is.na(s)], s[!is.na(s)], predictiveRiskTest[!is.na(s)], seq(0, 5000, 100)))
plot(AUC.uno(s[!is.na(s)], s[!is.na(s)], totalRiskTest[!is.na(s)], seq(0, 5000, 100)), add = TRUE, col = "black")
legend("bottomright", c("w/o treatment", "w treatment"), col = 2:1, lty = 1)
library(rpart)
r <- rpart(survival ~ ., data = dataFrame)
plot(r)
text(r)
survConcordance(na.omit(survival) ~ predict(r))
## Call:
## survConcordance(formula = na.omit(survival) ~ predict(r))
##
## n= 1548
## Concordance= 0.7177 se= 0.008913
## concordant discordant tied.risk tied.time std(c-d)
## 662681 190230 232040 3540 19340
library(parallel)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 1.9-3
set.seed(42)
source("/Users/mg14/Git/Projects/CoxHD/CoxHD/R/stacoxph.R")
source("/Users/mg14/Git/Projects/CoxHD/CoxHD/R/r_concave_tail.R")
Fit model
stabCox <- StabCox(dataFrame[!is.na(survival), ], survival[!is.na(survival)], bootstrap.samples = 50, control = "BH", level = 0.1)
## ..................................................
plot(stabCox)
selected <- which(stabCox$Pi > 0.8)
selected
## Genetics.CEBPA Genetics.NPM1 Genetics.NRAS
## 8 33 34
## Genetics.SF3B1 Genetics.SFRS2 Genetics.TP53
## 44 45 49
## Genetics.FLT3_ITD Cytogenetics.inv3_t3_3 Cytogenetics.minus5_5q
## 54 59 61
## Cytogenetics.minus7_7q Cytogenetics.mono17_17p_abn17p Cytogenetics.plus21
## 62 67 70
## Cytogenetics.t_15_17 Cytogenetics.t_8_21 Cytogenetics.inv16_t16_16
## 73 74 75
## Cytogenetics.abn3q_other Treatment.VPA Treatment.TPL
## 77 83 84
## Clinical.AOD Clinical.gender Clinical.PB_Blasts
## 85 86 89
## Clinical.wbc GeneGene.IDH2.DNMT3A GeneGene.FLT3_ITD.DNMT3A
## 90 156 652
## GeneGene.FLT3_TKD.NPM1 GeneTreat.IDH2.TPL GeneTreat.NPM1.tr0704
## 708 1341 1391
## GeneTreat.NPM1.TPL GeneTreat.NRAS.tr0704 GeneTreat.SFRS2.tr0704
## 1394 1396 1438
## CytoTreat.inv16_t16_16.tr0704
## 1572
coxFit <- coxph(survival[testIx] ~ ., data = dataFrame[testIx, ][, names(selected)])
summary(coxFit)
## Call:
## coxph(formula = survival[testIx] ~ ., data = dataFrame[testIx,
## ][, names(selected)])
##
## n= 1036, number of events= 745
## (20 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetics.CEBPA -0.850 0.427 0.166 -5.12 3.1e-07 ***
## Genetics.NPM1 -0.829 0.437 0.135 -6.13 8.8e-10 ***
## Genetics.NRAS 0.222 1.249 0.139 1.61 0.10829
## Genetics.SF3B1 0.267 1.306 0.234 1.14 0.25415
## Genetics.SFRS2 0.613 1.846 0.199 3.09 0.00201 **
## Genetics.TP53 0.210 1.233 0.209 1.00 0.31532
## Genetics.FLT3_ITD 0.419 1.520 0.114 3.68 0.00024 ***
## Cytogenetics.inv3_t3_3 1.189 3.284 0.312 3.81 0.00014 ***
## Cytogenetics.minus5_5q 0.547 1.729 0.191 2.87 0.00416 **
## Cytogenetics.minus7_7q 0.245 1.278 0.164 1.50 0.13416
## Cytogenetics.mono17_17p_abn17p 0.422 1.526 0.198 2.13 0.03291 *
## Cytogenetics.plus21 0.484 1.622 0.231 2.10 0.03593 *
## Cytogenetics.t_15_17 -2.147 0.117 0.272 -7.89 2.9e-15 ***
## Cytogenetics.t_8_21 -0.903 0.405 0.193 -4.69 2.8e-06 ***
## Cytogenetics.inv16_t16_16 -0.879 0.415 0.264 -3.33 0.00087 ***
## Cytogenetics.abn3q_other 0.231 1.260 0.246 0.94 0.34671
## Treatment.VPA 0.106 1.112 0.145 0.74 0.46188
## Treatment.TPL -1.542 0.214 0.135 -11.39 < 2e-16 ***
## Clinical.AOD 0.730 2.075 0.326 2.24 0.02505 *
## Clinical.gender -0.164 0.849 0.077 -2.13 0.03352 *
## Clinical.PB_Blasts 0.428 1.534 0.138 3.11 0.00187 **
## Clinical.wbc 2.883 17.864 0.915 3.15 0.00163 **
## GeneGene.IDH2.DNMT3A 0.340 1.405 0.183 1.85 0.06382 .
## GeneGene.FLT3_ITD.DNMT3A 0.536 1.709 0.166 3.23 0.00125 **
## GeneGene.FLT3_TKD.NPM1 -0.461 0.630 0.264 -1.75 0.08042 .
## GeneTreat.IDH2.TPL -1.138 0.320 0.469 -2.43 0.01513 *
## GeneTreat.NPM1.tr0704 -0.260 0.771 0.156 -1.66 0.09647 .
## GeneTreat.NPM1.TPL 0.659 1.933 0.232 2.85 0.00443 **
## GeneTreat.NRAS.tr0704 -0.430 0.650 0.203 -2.12 0.03424 *
## GeneTreat.SFRS2.tr0704 0.183 1.201 0.275 0.67 0.50539
## CytoTreat.inv16_t16_16.tr0704 -1.147 0.318 0.447 -2.56 0.01033 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Genetics.CEBPA 0.427 2.340 0.3087 0.592
## Genetics.NPM1 0.437 2.290 0.3350 0.569
## Genetics.NRAS 1.249 0.801 0.9522 1.639
## Genetics.SF3B1 1.306 0.766 0.8254 2.066
## Genetics.SFRS2 1.846 0.542 1.2512 2.724
## Genetics.TP53 1.233 0.811 0.8190 1.857
## Genetics.FLT3_ITD 1.520 0.658 1.2159 1.900
## Cytogenetics.inv3_t3_3 3.284 0.305 1.7812 6.053
## Cytogenetics.minus5_5q 1.729 0.578 1.1889 2.514
## Cytogenetics.minus7_7q 1.278 0.782 0.9271 1.762
## Cytogenetics.mono17_17p_abn17p 1.526 0.655 1.0349 2.250
## Cytogenetics.plus21 1.622 0.617 1.0323 2.549
## Cytogenetics.t_15_17 0.117 8.559 0.0686 0.199
## Cytogenetics.t_8_21 0.405 2.467 0.2778 0.591
## Cytogenetics.inv16_t16_16 0.415 2.408 0.2476 0.697
## Cytogenetics.abn3q_other 1.260 0.793 0.7784 2.041
## Treatment.VPA 1.112 0.899 0.8376 1.477
## Treatment.TPL 0.214 4.674 0.1641 0.279
## Clinical.AOD 2.075 0.482 1.0958 3.930
## Clinical.gender 0.849 1.178 0.7301 0.987
## Clinical.PB_Blasts 1.534 0.652 1.1716 2.010
## Clinical.wbc 17.864 0.056 2.9727 107.348
## GeneGene.IDH2.DNMT3A 1.405 0.712 0.9807 2.012
## GeneGene.FLT3_ITD.DNMT3A 1.709 0.585 1.2344 2.367
## GeneGene.FLT3_TKD.NPM1 0.630 1.586 0.3758 1.057
## GeneTreat.IDH2.TPL 0.320 3.122 0.1278 0.803
## GeneTreat.NPM1.tr0704 0.771 1.297 0.5677 1.048
## GeneTreat.NPM1.TPL 1.933 0.517 1.2278 3.044
## GeneTreat.NRAS.tr0704 0.650 1.538 0.4366 0.969
## GeneTreat.SFRS2.tr0704 1.201 0.833 0.7004 2.060
## CytoTreat.inv16_t16_16.tr0704 0.318 3.148 0.1322 0.763
##
## Concordance= 0.756 (se = 0.011 )
## Rsquare= 0.438 (max possible= 1 )
## Likelihood ratio test= 598 on 31 df, p=0
## Wald test = 544 on 31 df, p=0
## Score (logrank) test = 636 on 31 df, p=0
totalRiskStabTest <- as.matrix(dataFrame[!testIx, ][, names(selected)]) %*% coxFit$coefficients
survConcordance(survival[!testIx] ~ totalRiskStabTest)
## Call:
## survConcordance(formula = survival[!testIx] ~ totalRiskStabTest)
##
## n=512 (11 observations deleted due to missingness)
## Concordance= 0.7601 se= 0.0158
## concordant discordant tied.risk tied.time std(c-d)
## 92204 29098 0 496 3834
plot(survfit(survival ~ FLT3_ITD + DNMT3A, data = dataList$Genetics), col = c("grey", brewer.pal(3, "Set1")))
legend("topright", bty = "n", lty = 1, col = c("grey", brewer.pal(3, "Set1")), c("WT/WT", "WT/DNMT3A-", "FLT3-/WT", "FLT3-/DNMT3A-"))
Probably effect of time-dep TPL:
plot(survfit(survival ~ Genetics.TP53 + Treatment.TPL, data = dataFrame), col = c("grey", brewer.pal(3, "Set1")))
legend("topright", bty = "n", lty = 1, col = c("grey", brewer.pal(3, "Set1")), c("WT/TPL-", "WT/TPL+", "TP53-/TPL-", "TP53-/TPL+"))
source("/Users/mg14/Git/Projects/CoxHD/CoxHD/R/functions.R")
simSurvNonp
## function(risk, H0, surv, cens.frac=mean(surv[,2]==0, na.rm=TRUE), ...) {
## #hazardDist <- loess(H0$time ~ H0$hazard, control = loess.control(surface = "direct"), ...)
## hazardDist <- splinefun(H0$hazard, H0$time, method="monoH.FC")
## n = length(risk)
## deathTimes = hazardDist(rexp(n, exp(risk))) #predict(hazardDist, rexp(n, exp(risk)))
## censDist <- loess(sort(na.omit(surv[surv[,2]==0,1])) ~ seq(0,1, length=sum(surv[,2]==0, na.rm=TRUE)), ...)
## censTimes <- predict(censDist, runif(n, 0,1))
## #censDist <- splinefun(sort(na.omit(surv[surv[,2]==0,1])) ~ seq(0,1, length=sum(surv[,2]==0, na.rm=TRUE)), method="monoH.FC")
## #censTimes <- censDist(runif(n,0,1))
## surv = Surv(time = pmax(0,pmin(deathTimes, censTimes)), event=(deathTimes < censTimes)+0)
## return(surv)
## }
set.seed(42)
partRisk <- PartialRisk(coxRFXFit)
totalRisk <- rowSums(partRisk)
simSurvival <- simSurvNonp(totalRisk, basehaz(coxRFXFit, centered = FALSE), survival, span = 0.1)
plot(survfit(simSurvival ~ 1))
lines(survfit(survival ~ 1), col = "red")
survConcordance(simSurvival[testIx] ~ totalRisk[testIx])
## Call:
## survConcordance(formula = simSurvival[testIx] ~ totalRisk[testIx])
##
## n= 1056
## Concordance= 0.7417 se= 0.01127
## concordant discordant tied.risk tied.time std(c-d)
## 363347 126565 0 0 11047
survConcordance(simSurvival[testIx][-na.action(coxFit)] ~ predict(coxFit))
## Call:
## survConcordance(formula = simSurvival[testIx][-na.action(coxFit)] ~
## predict(coxFit))
##
## n= 1036
## Concordance= 0.7271 se= 0.01139
## concordant discordant tied.risk tied.time std(c-d)
## 342328 128472 0 0 10729
simCoxRFXFit <- CoxRFX(dataFrame[, whichRFX], simSurvival, groups = groups[whichRFX], sigma0 = 0.1)
par(mar = c(5, 7, 1, 1))
boxplot(coef(simCoxRFXFit) ~ groups[whichRFX], border = set1, horizontal = TRUE, las = 2)
abline(v = 0)
plot(coef(simCoxRFXFit), coef(coxRFXFit), col = set1[groups[whichRFX]])
simPredictedRisk <- PredictRiskMissing(simCoxRFXFit)
plot(totalRisk, simPredictedRisk[, 1], xlab = "True risk (simulated)", ylab = "Estimated risk")
segments(totalRisk, simPredictedRisk[, 1] + sqrt(simPredictedRisk[, 2]), totalRisk, simPredictedRisk[, 1] - sqrt(simPredictedRisk[,
2]), col = "#00000022")
abline(0, 1, col = "red")
par(mfrow = c(3, 3))
p <- PartialRisk(simCoxRFXFit)
v <- PartialRiskVar(simCoxRFXFit)
for (i in 1:8) {
plot(partRisk[, i], p[, i], main = colnames(partRisk)[i], xlab = "True risk component (simulated)", ylab = "Estimated risk component")
segments(partRisk[, i], p[, i] + sqrt(v[, i]), partRisk[, i], p[, i] - sqrt(v[, i]), col = "#00000022")
abline(0, 1, col = "red")
}
Stability selection with simulated survival
set.seed(42)
s <- simSurvival
s[s[, 1] == 0, 1] <- 0.5 #Machine$double.eps
simStabCox <- StabCox(dataFrame[!is.na(survival), ], s[!is.na(survival)], bootstrap.samples = 50, control = "BH", level = 0.1)
## ....
## Warning: from glmnet Fortran code - Convergence for 246th lambda value not reached after maxit=100000 iterations;
## solutions for larger lambdas returned
## x......
## Warning: from glmnet Fortran code - Convergence for 249th lambda value not reached after maxit=100000 iterations;
## solutions for larger lambdas returned
## X
## Warning: from glmnet Fortran code - Convergence for 250th lambda value not reached after maxit=100000 iterations;
## solutions for larger lambdas returned
## X.....................................
## Warning: Number of samples smaller than number of bootstrap samples. Some have failed
Plots
plot(simStabCox)
plot(simStabCox$Pi, stabCox$Pi)
Selected variables
selected <- which(simStabCox$Pi > 0.8)
selected
## Genetics.ASXL1 Genetics.CEBPA Genetics.DNMT3A
## 1 8 11
## Genetics.IDH1 Genetics.KRAS Genetics.NPM1
## 18 25 33
## Genetics.PTPN11 Genetics.RUNX1 Genetics.SFRS2
## 38 41 45
## Genetics.TP53 Genetics.WT1 Genetics.FLT3_ITD
## 49 52 54
## Cytogenetics.inv3_t3_3 Cytogenetics.minus5_5q Cytogenetics.minus7_7q
## 59 61 62
## Cytogenetics.plus8_8q Cytogenetics.plus13 Cytogenetics.mono17_17p_abn17p
## 63 66 67
## Cytogenetics.minusY Cytogenetics.t_15_17 Cytogenetics.t_8_21
## 72 73 74
## Cytogenetics.inv16_t16_16 Cytogenetics.abn3q_other Treatment.TPL
## 75 77 84
## Clinical.gender Clinical.PB_Blasts Clinical.tAML
## 86 89 96
## GeneGene.NRAS.NPM1 GeneGene.FLT3_ITD.NRAS GeneCyto.NRAS.inv16_t16_16
## 308 671 1045
## GeneTreat.CEBPA.tr0704 GeneTreat.NPM1.tr0704
## 1290 1391
simCoxFit <- coxph(simSurvival[testIx] ~ ., data = dataFrame[testIx, ][, names(selected)])
summary(simCoxFit)
## Call:
## coxph(formula = simSurvival[testIx] ~ ., data = dataFrame[testIx,
## ][, names(selected)])
##
## n= 1056, number of events= 731
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetics.ASXL1 0.3006 1.3506 0.1696 1.77 0.0764 .
## Genetics.CEBPA -0.3872 0.6790 0.2343 -1.65 0.0984 .
## Genetics.DNMT3A 0.1712 1.1867 0.1023 1.67 0.0944 .
## Genetics.IDH1 0.1049 1.1106 0.1552 0.68 0.4991
## Genetics.KRAS 0.2957 1.3440 0.1572 1.88 0.0600 .
## Genetics.NPM1 -0.6496 0.5222 0.1365 -4.76 1.9e-06 ***
## Genetics.PTPN11 0.3595 1.4326 0.1432 2.51 0.0121 *
## Genetics.RUNX1 0.2031 1.2252 0.1328 1.53 0.1261
## Genetics.SFRS2 0.4302 1.5376 0.1587 2.71 0.0067 **
## Genetics.TP53 0.0811 1.0845 0.1948 0.42 0.6773
## Genetics.WT1 0.1811 1.1986 0.1978 0.92 0.3599
## Genetics.FLT3_ITD 0.4354 1.5456 0.1043 4.17 3.0e-05 ***
## Cytogenetics.inv3_t3_3 1.6900 5.4197 0.2945 5.74 9.6e-09 ***
## Cytogenetics.minus5_5q 0.4355 1.5457 0.1788 2.44 0.0149 *
## Cytogenetics.minus7_7q 0.0353 1.0359 0.1611 0.22 0.8267
## Cytogenetics.plus8_8q 0.1668 1.1815 0.1297 1.29 0.1985
## Cytogenetics.plus13 0.6874 1.9885 0.2693 2.55 0.0107 *
## Cytogenetics.mono17_17p_abn17p 0.8151 2.2595 0.2007 4.06 4.9e-05 ***
## Cytogenetics.minusY -0.1522 0.8588 0.2717 -0.56 0.5755
## Cytogenetics.t_15_17 -2.1987 0.1109 0.2929 -7.51 6.0e-14 ***
## Cytogenetics.t_8_21 -1.0047 0.3661 0.2486 -4.04 5.3e-05 ***
## Cytogenetics.inv16_t16_16 -0.8021 0.4484 0.2519 -3.18 0.0014 **
## Cytogenetics.abn3q_other 0.4581 1.5811 0.2495 1.84 0.0664 .
## Treatment.TPL -1.4744 0.2289 0.1192 -12.37 < 2e-16 ***
## Clinical.gender -0.1190 0.8878 0.0820 -1.45 0.1465
## Clinical.PB_Blasts 0.6518 1.9190 0.1322 4.93 8.2e-07 ***
## Clinical.tAML 0.3552 1.4265 0.1832 1.94 0.0525 .
## GeneGene.NRAS.NPM1 -0.5431 0.5809 0.2250 -2.41 0.0158 *
## GeneGene.FLT3_ITD.NRAS 0.9619 2.6166 0.3408 2.82 0.0048 **
## GeneCyto.NRAS.inv16_t16_16 -1.0290 0.3574 0.4362 -2.36 0.0183 *
## GeneTreat.CEBPA.tr0704 -0.4523 0.6362 0.3075 -1.47 0.1413
## GeneTreat.NPM1.tr0704 -0.2372 0.7888 0.1562 -1.52 0.1289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Genetics.ASXL1 1.351 0.740 0.9686 1.883
## Genetics.CEBPA 0.679 1.473 0.4290 1.075
## Genetics.DNMT3A 1.187 0.843 0.9710 1.450
## Genetics.IDH1 1.111 0.900 0.8193 1.505
## Genetics.KRAS 1.344 0.744 0.9877 1.829
## Genetics.NPM1 0.522 1.915 0.3997 0.682
## Genetics.PTPN11 1.433 0.698 1.0819 1.897
## Genetics.RUNX1 1.225 0.816 0.9445 1.589
## Genetics.SFRS2 1.538 0.650 1.1266 2.099
## Genetics.TP53 1.084 0.922 0.7402 1.589
## Genetics.WT1 1.199 0.834 0.8134 1.766
## Genetics.FLT3_ITD 1.546 0.647 1.2598 1.896
## Cytogenetics.inv3_t3_3 5.420 0.185 3.0427 9.654
## Cytogenetics.minus5_5q 1.546 0.647 1.0887 2.194
## Cytogenetics.minus7_7q 1.036 0.965 0.7554 1.421
## Cytogenetics.plus8_8q 1.182 0.846 0.9163 1.524
## Cytogenetics.plus13 1.989 0.503 1.1731 3.371
## Cytogenetics.mono17_17p_abn17p 2.260 0.443 1.5248 3.348
## Cytogenetics.minusY 0.859 1.164 0.5042 1.463
## Cytogenetics.t_15_17 0.111 9.013 0.0625 0.197
## Cytogenetics.t_8_21 0.366 2.731 0.2249 0.596
## Cytogenetics.inv16_t16_16 0.448 2.230 0.2737 0.735
## Cytogenetics.abn3q_other 1.581 0.632 0.9695 2.578
## Treatment.TPL 0.229 4.368 0.1812 0.289
## Clinical.gender 0.888 1.126 0.7561 1.043
## Clinical.PB_Blasts 1.919 0.521 1.4810 2.486
## Clinical.tAML 1.426 0.701 0.9961 2.043
## GeneGene.NRAS.NPM1 0.581 1.721 0.3738 0.903
## GeneGene.FLT3_ITD.NRAS 2.617 0.382 1.3418 5.103
## GeneCyto.NRAS.inv16_t16_16 0.357 2.798 0.1520 0.840
## GeneTreat.CEBPA.tr0704 0.636 1.572 0.3482 1.162
## GeneTreat.NPM1.tr0704 0.789 1.268 0.5807 1.071
##
## Concordance= 0.734 (se = 0.011 )
## Rsquare= 0.392 (max possible= 1 )
## Likelihood ratio test= 525 on 32 df, p=0
## Wald test = 474 on 32 df, p=0
## Score (logrank) test = 553 on 32 df, p=0
Only include interaction terms when main effects are present. On the other hand, the exclusive selection of a product term could mean that the most likely explanation is that the two main terms are zero and only the interaction is non-zero..
StabCoxInteractions
## function(X, surv, ...){
## s1 <- StabCox(X, surv, level=0.05, control="none",...)
## w <- which(s1$Pi > s1$pi.thr)
## I <- makeInteractions(X[,w],X[,w])[,as.vector(upper.tri(matrix(0,ncol=length(w), nrow=length(w))))]
## I <- I[,colSums(I) > 0]
## Z <- cbind(X[,w], I)
## StabCox(Z, surv, penalty.factor = c(rep(0, length(w)), rep(1, ncol(Z)-length(w))), control = "BH", ...)
## }
set.seed(42)
simStabCoxInt <- StabCoxInteractions(dataFrame[!is.na(survival), groups %in% c("Genetics", "Clinical", "Treatment", "Cytogenetics")],
s[!is.na(survival)], bootstrap.samples = 50)
## ..................................................
## Error: could not find function "makeInteractions"
s <- survival
s[s[, 1] == 0, 1] <- 0.5 #Machine$double.eps
stabCoxInt <- StabCoxInteractions(dataFrame[!is.na(survival), groups %in% c("Genetics", "Clinical", "Treatment", "Cytogenetics")],
s[!is.na(survival)], bootstrap.samples = 50)
## ..
## Warning: from glmnet Fortran code - Convergence for 130th lambda value not reached after maxit=100000 iterations;
## solutions for larger lambdas returned
## x...............................................
## Warning: Number of samples smaller than number of bootstrap samples. Some have failed
## Error: could not find function "makeInteractions"
s <- strsplit(gsub("(Genetics|Cytogenetics|Treatment|Clinical).", "", names(stabCoxInt$Pi)), "\\.")
## Error: object 'stabCoxInt' not found
m <- sapply(s, function(x) grep(paste(paste(x, collapse = "."), paste(rev(x), collapse = "."), sep = "|"), colnames(dataFrame))[1])
Plots
plot(stabCoxInt)
## Error: object 'stabCoxInt' not found
plot(stabCoxInt$Pi, stabCox$Pi[m])
## Error: object 'stabCoxInt' not found
plot(simStabCoxInt)
## Error: object 'simStabCoxInt' not found
plot(stabCoxInt$Pi, simStabCoxInt$Pi[names(stabCoxInt$Pi)])
## Error: object 'stabCoxInt' not found
Selected variables
selected <- which(simStabCoxInt$Pi > 0.8)
## Error: object 'simStabCoxInt' not found
selected
## Genetics.ASXL1 Genetics.CEBPA Genetics.DNMT3A
## 1 8 11
## Genetics.IDH1 Genetics.KRAS Genetics.NPM1
## 18 25 33
## Genetics.PTPN11 Genetics.RUNX1 Genetics.SFRS2
## 38 41 45
## Genetics.TP53 Genetics.WT1 Genetics.FLT3_ITD
## 49 52 54
## Cytogenetics.inv3_t3_3 Cytogenetics.minus5_5q Cytogenetics.minus7_7q
## 59 61 62
## Cytogenetics.plus8_8q Cytogenetics.plus13 Cytogenetics.mono17_17p_abn17p
## 63 66 67
## Cytogenetics.minusY Cytogenetics.t_15_17 Cytogenetics.t_8_21
## 72 73 74
## Cytogenetics.inv16_t16_16 Cytogenetics.abn3q_other Treatment.TPL
## 75 77 84
## Clinical.gender Clinical.PB_Blasts Clinical.tAML
## 86 89 96
## GeneGene.NRAS.NPM1 GeneGene.FLT3_ITD.NRAS GeneCyto.NRAS.inv16_t16_16
## 308 671 1045
## GeneTreat.CEBPA.tr0704 GeneTreat.NPM1.tr0704
## 1290 1391
simCoxFit <- coxph(simSurvival[testIx] ~ ., data = simStabCoxInt[testIx, ][, names(selected)])
## Error: object 'simStabCoxInt' not found
summary(simCoxFit)
## Call:
## coxph(formula = simSurvival[testIx] ~ ., data = dataFrame[testIx,
## ][, names(selected)])
##
## n= 1056, number of events= 731
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetics.ASXL1 0.3006 1.3506 0.1696 1.77 0.0764 .
## Genetics.CEBPA -0.3872 0.6790 0.2343 -1.65 0.0984 .
## Genetics.DNMT3A 0.1712 1.1867 0.1023 1.67 0.0944 .
## Genetics.IDH1 0.1049 1.1106 0.1552 0.68 0.4991
## Genetics.KRAS 0.2957 1.3440 0.1572 1.88 0.0600 .
## Genetics.NPM1 -0.6496 0.5222 0.1365 -4.76 1.9e-06 ***
## Genetics.PTPN11 0.3595 1.4326 0.1432 2.51 0.0121 *
## Genetics.RUNX1 0.2031 1.2252 0.1328 1.53 0.1261
## Genetics.SFRS2 0.4302 1.5376 0.1587 2.71 0.0067 **
## Genetics.TP53 0.0811 1.0845 0.1948 0.42 0.6773
## Genetics.WT1 0.1811 1.1986 0.1978 0.92 0.3599
## Genetics.FLT3_ITD 0.4354 1.5456 0.1043 4.17 3.0e-05 ***
## Cytogenetics.inv3_t3_3 1.6900 5.4197 0.2945 5.74 9.6e-09 ***
## Cytogenetics.minus5_5q 0.4355 1.5457 0.1788 2.44 0.0149 *
## Cytogenetics.minus7_7q 0.0353 1.0359 0.1611 0.22 0.8267
## Cytogenetics.plus8_8q 0.1668 1.1815 0.1297 1.29 0.1985
## Cytogenetics.plus13 0.6874 1.9885 0.2693 2.55 0.0107 *
## Cytogenetics.mono17_17p_abn17p 0.8151 2.2595 0.2007 4.06 4.9e-05 ***
## Cytogenetics.minusY -0.1522 0.8588 0.2717 -0.56 0.5755
## Cytogenetics.t_15_17 -2.1987 0.1109 0.2929 -7.51 6.0e-14 ***
## Cytogenetics.t_8_21 -1.0047 0.3661 0.2486 -4.04 5.3e-05 ***
## Cytogenetics.inv16_t16_16 -0.8021 0.4484 0.2519 -3.18 0.0014 **
## Cytogenetics.abn3q_other 0.4581 1.5811 0.2495 1.84 0.0664 .
## Treatment.TPL -1.4744 0.2289 0.1192 -12.37 < 2e-16 ***
## Clinical.gender -0.1190 0.8878 0.0820 -1.45 0.1465
## Clinical.PB_Blasts 0.6518 1.9190 0.1322 4.93 8.2e-07 ***
## Clinical.tAML 0.3552 1.4265 0.1832 1.94 0.0525 .
## GeneGene.NRAS.NPM1 -0.5431 0.5809 0.2250 -2.41 0.0158 *
## GeneGene.FLT3_ITD.NRAS 0.9619 2.6166 0.3408 2.82 0.0048 **
## GeneCyto.NRAS.inv16_t16_16 -1.0290 0.3574 0.4362 -2.36 0.0183 *
## GeneTreat.CEBPA.tr0704 -0.4523 0.6362 0.3075 -1.47 0.1413
## GeneTreat.NPM1.tr0704 -0.2372 0.7888 0.1562 -1.52 0.1289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Genetics.ASXL1 1.351 0.740 0.9686 1.883
## Genetics.CEBPA 0.679 1.473 0.4290 1.075
## Genetics.DNMT3A 1.187 0.843 0.9710 1.450
## Genetics.IDH1 1.111 0.900 0.8193 1.505
## Genetics.KRAS 1.344 0.744 0.9877 1.829
## Genetics.NPM1 0.522 1.915 0.3997 0.682
## Genetics.PTPN11 1.433 0.698 1.0819 1.897
## Genetics.RUNX1 1.225 0.816 0.9445 1.589
## Genetics.SFRS2 1.538 0.650 1.1266 2.099
## Genetics.TP53 1.084 0.922 0.7402 1.589
## Genetics.WT1 1.199 0.834 0.8134 1.766
## Genetics.FLT3_ITD 1.546 0.647 1.2598 1.896
## Cytogenetics.inv3_t3_3 5.420 0.185 3.0427 9.654
## Cytogenetics.minus5_5q 1.546 0.647 1.0887 2.194
## Cytogenetics.minus7_7q 1.036 0.965 0.7554 1.421
## Cytogenetics.plus8_8q 1.182 0.846 0.9163 1.524
## Cytogenetics.plus13 1.989 0.503 1.1731 3.371
## Cytogenetics.mono17_17p_abn17p 2.260 0.443 1.5248 3.348
## Cytogenetics.minusY 0.859 1.164 0.5042 1.463
## Cytogenetics.t_15_17 0.111 9.013 0.0625 0.197
## Cytogenetics.t_8_21 0.366 2.731 0.2249 0.596
## Cytogenetics.inv16_t16_16 0.448 2.230 0.2737 0.735
## Cytogenetics.abn3q_other 1.581 0.632 0.9695 2.578
## Treatment.TPL 0.229 4.368 0.1812 0.289
## Clinical.gender 0.888 1.126 0.7561 1.043
## Clinical.PB_Blasts 1.919 0.521 1.4810 2.486
## Clinical.tAML 1.426 0.701 0.9961 2.043
## GeneGene.NRAS.NPM1 0.581 1.721 0.3738 0.903
## GeneGene.FLT3_ITD.NRAS 2.617 0.382 1.3418 5.103
## GeneCyto.NRAS.inv16_t16_16 0.357 2.798 0.1520 0.840
## GeneTreat.CEBPA.tr0704 0.636 1.572 0.3482 1.162
## GeneTreat.NPM1.tr0704 0.789 1.268 0.5807 1.071
##
## Concordance= 0.734 (se = 0.011 )
## Rsquare= 0.392 (max possible= 1 )
## Likelihood ratio test= 525 on 32 df, p=0
## Wald test = 474 on 32 df, p=0
## Score (logrank) test = 553 on 32 df, p=0
Simulate data
set.seed(42)
simDataNonp
## function(oldData, nData, percentMissing = 0.33, ...){
## require(mice)
## oldData <- oldData[!apply(is.na(oldData), 1, all),]
## for(i in 1: nrow(oldData)){
## while(TRUE){
## naIdx <- sample(ncol(oldData), round(percentMissing * ncol(oldData)))
## if(! all(1:ncol(oldData) %in% naIdx))
## break
## }
## oldData[i,naIdx] <- NA
## }
## #oldData <- as.matrix(oldData[sample(nrow(oldData), nData, replace=TRUE),])
## m <- mice(as.data.frame(oldData), printFlag=FALSE, ...)
## newData <- complete(m, action="long")
## newData <- newData[sample(nrow(newData), nData, replace=nrow(newData)< nData),-2:-1]
## return(newData)
## }
data <- data.frame(Clinical = dataList$Clinical, Genetics = dataList$Genetics, Cytogenetics = dataList$Cytogenetics, Treatment = dataList$Treatment)
# simData <- simDataNonp(data, nData = 10000, m=5) simData <- simDataNonp(data, nData = nrow(data))
# save(file='simData.RData', simData)
load(file = "simData.RData")
simData$Genetics.FLT3 <- NULL
names(simData) <- names(data)
g <- sub("\\..+", "", colnames(data))
simDataFrame <- data.frame(simData, GeneGene = makeInteractions(simData[, g == "Genetics"], simData[, g == "Genetics"])[,
as.vector(upper.tri(matrix(0, ncol = sum(g == "Genetics"), nrow = sum(g == "Genetics"))))], GeneCyto = makeInteractions(simData[,
g == "Genetics"], simData[, g == "Cytogenetics"]), GeneTreat = makeInteractions(simData[, g == "Genetics"], simData[,
g == "Treatment"]), CytoTreat = makeInteractions(simData[, g == "Cytogenetics"], simData[, g == "Treatment"]))
colnames(simDataFrame) <- gsub("\\.(Genetics|Treatment|Cytogenetics)", "", colnames(simDataFrame))
simDataFrame <- simDataFrame[, colnames(dataFrame)]
for (n in names(simDataFrame)) if (any(is.na(simDataFrame[[n]]))) simDataFrame[[n]] <- poorMansImpute(simDataFrame[[n]])
simDataFrame <- StandardizeMagnitude(simDataFrame)
# simDataFrameTD <- simDataFrame[tplSplit,] simDataFrameTD[which(tplIndex), grep('TPL', colnames(simDataFrameTD),
# value=TRUE)] <- 0 ## Set pre-tpl variables to zero
Coefficients
set.seed(42)
v <- coxRFXFit$sigma2 * coxRFXFit$df[-8]/as.numeric(table(groups[whichRFXTD]) - 1)
v["GeneGene"] <- v["GeneTreat"] <- v["CytoTreat"] <- v["GeneCyto"] <- 0.1
simCoef <- numeric(ncol(simDataFrame))
names(simCoef) <- colnames(simDataFrame)
simCoef[whichRFXTD] <- rnorm(length(groups[whichRFXTD]), coxRFXFit$mu[groups[whichRFXTD]], sqrt(v[groups[whichRFXTD]]))
Survival
set.seed(42)
simDataRisk <- (as.matrix(simDataFrame) %*% simCoef)[, 1]
simDataRisk <- simDataRisk - mean(simDataRisk)
simDataSurvival <- simSurvNonp(simDataRisk, basehaz(coxRFXFit, centered = FALSE), survival, span = 0.1)
plot(survfit(simDataSurvival ~ Genetics.EP300, data = simDataFrame))
simDataCoxRFXFit <- CoxRFX(simDataFrame[, names(coef(coxRFXFit))], simDataSurvival, groups = groups[whichRFXTD], sigma0 = 0.1)
plot(simCoef[whichRFXTD], coef(simDataCoxRFXFit), col = set1[groups[whichRFXTD]])
plot(sapply(split(simCoef[whichRFXTD], groups[whichRFXTD]), var), simDataCoxRFXFit$sigma2, col = set1, pch = 19)
plot(sapply(split(simCoef[whichRFXTD], groups[whichRFXTD]), mean), simDataCoxRFXFit$mu, col = set1, pch = 19)
set.seed(42)
s <- simDataSurvival
s[s[, 1] == 0, 1] <- .Machine$double.eps
simDataStabCox <- StabCox(simDataFrame[!is.na(survival), ], s[!is.na(survival)], bootstrap.samples = 50, control = "BH",
level = 0.1)
## Warning: from glmnet Fortran code - Numerical error at 173th lambda value; solutions for larger values of lambda
## returned
## ..................................................
# save(simDataStabCox, file='simDataStabCox')
Plots
plot(simDataStabCox)
plot(simDataStabCox$Pi, stabCox$Pi)
plot(simCoef, simDataStabCox$Pi)
plot(colMeans(simDataFrame), simDataStabCox$Pi, cex = sqrt(abs(simCoef)) * 2 + 0.1, log = "x")
## Warning: 65 x values <= 0 omitted from logarithmic plot
Selected variables
selected <- which(simDataStabCox$Pi > 0.8)
selected
## Genetics.CREBBP Genetics.DNMT3A Genetics.EP300
## 9 11 12
## Genetics.FBXW7 Genetics.IDH1 Genetics.IDH2
## 15 18 19
## Genetics.KRAS Genetics.MYC Genetics.NPM1
## 25 31 33
## Genetics.RAD21 Genetics.STAG2 Genetics.TET2
## 39 47 48
## Genetics.TP53 Genetics.U2AF1 Genetics.FLT3_ITD
## 49 50 54
## Genetics.FLT3_TKD Cytogenetics.MLL_PTD Cytogenetics.t_MLL
## 55 57 58
## Cytogenetics.inv3_t3_3 Cytogenetics.plus8_8q Cytogenetics.minus9q
## 59 63 64
## Cytogenetics.plus13 Cytogenetics.minus18_18q Cytogenetics.minus20_20q
## 66 68 69
## Cytogenetics.plus21 Cytogenetics.plus22 Cytogenetics.t_15_17
## 70 71 73
## Cytogenetics.t_8_21 Cytogenetics.t_6_9 Cytogenetics.abn3q_other
## 74 76 77
## Cytogenetics.mono4_4q_abn4q Treatment.tr98B Treatment.tr0704
## 79 80 81
## Treatment.ATRA Treatment.VPA Treatment.TPL
## 82 83 84
## Clinical.gender Clinical.Performance_ECOG Clinical.BM_Blasts
## 86 87 88
## Clinical.PB_Blasts Clinical.LDH_ Clinical.HB
## 89 91 92
## Clinical.platelet Clinical.oAML Clinical.sAML
## 93 94 95
## GeneGene.GATA2.CEBPA GeneGene.KRAS.CEBPA GeneGene.NF1.DNMT3A
## 134 199 245
## GeneGene.NPM1.CEBPA GeneGene.NPM1.IDH2 GeneGene.NPM1.KRAS
## 262 270 274
## GeneGene.NPM1.MYC GeneGene.NRAS.DNMT3A GeneGene.NRAS.EZH2
## 278 288 291
## GeneGene.PTPN11.DNMT3A GeneGene.PTPN11.NPM1 GeneGene.PTPN11.NRAS
## 348 357 358
## GeneGene.RAD21.PTPN11 GeneGene.RUNX1.EZH2 GeneGene.RUNX1.IDH2
## 383 408 412
## GeneGene.RUNX1.NRAS GeneGene.SFRS2.ASXL1 GeneGene.SFRS2.IDH1
## 423 457 467
## GeneGene.TET2.DNMT3A GeneGene.TET2.KRAS GeneGene.TET2.NRAS
## 520 527 533
## GeneGene.TET2.SFRS2 GeneGene.TET2.STAG2 GeneGene.TP53.NRAS
## 541 542 565
## GeneGene.FLT3_ITD.CBL GeneGene.FLT3_ITD.CEBPA GeneGene.FLT3_ITD.PTPN11
## 648 649 674
## GeneGene.FLT3_TKD.IDH2 GeneGene.FLT3_TKD.RUNX1 GeneGene.FLT3_TKD.WT1
## 700 715 721
## GeneCyto.CEBPA.minus9q GeneCyto.CEBPA.plus21 GeneCyto.DNMT3A.MLL_PTD
## 801 803 815
## GeneCyto.DNMT3A.minus5_5q GeneCyto.EZH2.plus8_8q GeneCyto.IDH2.plus8_8q
## 818 860 893
## GeneCyto.KIT.t_8_21 GeneCyto.KIT.inv16_t16_16 GeneCyto.KRAS.minus7_7q
## 945 946 954
## GeneCyto.NPM1.plus8_8q GeneCyto.NRAS.MLL_PTD GeneCyto.NRAS.minus7_7q
## 1018 1028 1032
## GeneCyto.NRAS.plus8_8q GeneCyto.NRAS.plus22 GeneCyto.NRAS.minusY
## 1033 1041 1042
## GeneCyto.NRAS.inv16_t16_16 GeneCyto.RUNX1.MLL_PTD GeneCyto.RUNX1.minus7_7q
## 1045 1093 1097
## GeneCyto.RUNX1.plus13 GeneCyto.TP53.minus5_5q GeneCyto.TP53.mono17_17p_abn17p
## 1101 1176 1182
## GeneCyto.TP53.mono4_4q_abn4q GeneCyto.U2AF1.minus20_20q GeneCyto.WT1.plus8_8q
## 1192 1199 1207
## GeneCyto.FLT3_ITD.MLL_PTD GeneCyto.FLT3_ITD.minus9q GeneCyto.FLT3_ITD.t_15_17
## 1221 1226 1233
## GeneCyto.FLT3_TKD.minus7_7q GeneTreat.ASXL1.tr0704 GeneTreat.CBL.tr0704
## 1243 1264 1279
## GeneTreat.CEBPA.ATRA GeneTreat.CEBPA.VPA GeneTreat.CEBPA.TPL
## 1291 1292 1293
## GeneTreat.DNMT3A.tr0704 GeneTreat.IDH1.TPL GeneTreat.IDH2.tr0704
## 1304 1336 1338
## GeneTreat.IDH2.ATRA GeneTreat.KIT.tr0704 GeneTreat.MLL2.tr0704
## 1339 1360 1369
## GeneTreat.MYC.tr0704 GeneTreat.NPM1.VPA GeneTreat.NRAS.tr0704
## 1381 1393 1396
## GeneTreat.NRAS.TPL GeneTreat.PTPN11.tr0704 GeneTreat.PTPN11.ATRA
## 1399 1411 1412
## GeneTreat.PTPN11.TPL GeneTreat.RUNX1.ATRA GeneTreat.RUNX1.VPA
## 1414 1426 1427
## GeneTreat.RUNX1.TPL GeneTreat.SFRS2.tr0704 GeneTreat.SFRS2.VPA
## 1428 1438 1440
## GeneTreat.TET2.tr98B GeneTreat.TET2.ATRA GeneTreat.TET2.TPL
## 1448 1450 1452
## GeneTreat.TP53.ATRA GeneTreat.TP53.TPL GeneTreat.FLT3_ITD.tr98B
## 1455 1457 1475
## GeneTreat.FLT3_ITD.tr0704 GeneTreat.FLT3_ITD.ATRA GeneTreat.FLT3_ITD.VPA
## 1476 1477 1478
## GeneTreat.FLT3_TKD.ATRA CytoTreat.MLL_PTD.tr0704 CytoTreat.MLL_PTD.ATRA
## 1482 1491 1492
## CytoTreat.t_MLL.TPL CytoTreat.inv3_t3_3.tr0704 CytoTreat.minus5_5q.ATRA
## 1499 1501 1507
## CytoTreat.minus7_7q.tr0704 CytoTreat.plus8_8q.tr98B CytoTreat.plus8_8q.tr0704
## 1511 1515 1516
## CytoTreat.plus8_8q.ATRA CytoTreat.plus8_8q.TPL CytoTreat.minus9q.tr0704
## 1517 1519 1521
## CytoTreat.minus9q.ATRA CytoTreat.mono12_12p_abn12p.tr0704 CytoTreat.plus21.tr0704
## 1522 1526 1551
## CytoTreat.plus21.TPL CytoTreat.minusY.tr0704 CytoTreat.minusY.ATRA
## 1554 1561 1562
## CytoTreat.abn3q_other.tr0704 CytoTreat.plus11_11q.ATRA CytoTreat.mono4_4q_abn4q.tr0704
## 1581 1587 1591
simDataCoxFit <- coxph(simDataSurvival[testIx] ~ ., data = simDataFrame[testIx, ][, names(selected)])
summary(simDataCoxFit)
## Call:
## coxph(formula = simDataSurvival[testIx] ~ ., data = simDataFrame[testIx,
## ][, names(selected)])
##
## n= 6693, number of events= 4450
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetics.CREBBP 0.69808 2.00990 0.17052 4.09 4.2e-05 ***
## Genetics.DNMT3A 0.25421 1.28944 0.06842 3.72 0.00020 ***
## Genetics.EP300 1.13457 3.10985 0.13115 8.65 < 2e-16 ***
## Genetics.FBXW7 0.95854 2.60789 0.22463 4.27 2.0e-05 ***
## Genetics.IDH1 -0.63607 0.52937 0.07638 -8.33 < 2e-16 ***
## Genetics.IDH2 -0.17683 0.83792 0.11646 -1.52 0.12894
## Genetics.KRAS 0.27451 1.31589 0.10152 2.70 0.00685 **
## Genetics.MYC 0.63057 1.87867 0.18589 3.39 0.00069 ***
## Genetics.NPM1 0.26397 1.30209 0.04972 5.31 1.1e-07 ***
## Genetics.RAD21 -0.34740 0.70652 0.08504 -4.09 4.4e-05 ***
## Genetics.STAG2 -0.10648 0.89899 0.11339 -0.94 0.34768
## Genetics.TET2 0.45505 1.57626 0.08208 5.54 3.0e-08 ***
## Genetics.TP53 -0.31902 0.72686 0.22266 -1.43 0.15192
## Genetics.U2AF1 0.18051 1.19783 0.12207 1.48 0.13919
## Genetics.FLT3_ITD 0.21942 1.24535 0.06698 3.28 0.00105 **
## Genetics.FLT3_TKD 0.09165 1.09598 0.07676 1.19 0.23250
## Cytogenetics.MLL_PTD 0.28760 1.33322 0.13978 2.06 0.03964 *
## Cytogenetics.t_MLL 0.24676 1.27987 0.09131 2.70 0.00688 **
## Cytogenetics.inv3_t3_3 -0.70747 0.49289 0.30243 -2.34 0.01932 *
## Cytogenetics.plus8_8q 0.37756 1.45872 0.08982 4.20 2.6e-05 ***
## Cytogenetics.minus9q 0.54127 1.71819 0.15974 3.39 0.00070 ***
## Cytogenetics.plus13 0.83718 2.30985 0.16357 5.12 3.1e-07 ***
## Cytogenetics.minus18_18q 0.85377 2.34849 0.13872 6.15 7.5e-10 ***
## Cytogenetics.minus20_20q 0.22334 1.25025 0.13595 1.64 0.10042
## Cytogenetics.plus21 0.66747 1.94931 0.17448 3.83 0.00013 ***
## Cytogenetics.plus22 -0.41792 0.65841 0.13419 -3.11 0.00184 **
## Cytogenetics.t_15_17 0.36012 1.43349 0.08777 4.10 4.1e-05 ***
## Cytogenetics.t_8_21 -0.35597 0.70049 0.10451 -3.41 0.00066 ***
## Cytogenetics.t_6_9 0.38031 1.46274 0.14293 2.66 0.00779 **
## Cytogenetics.abn3q_other 0.70340 2.02060 0.11452 6.14 8.1e-10 ***
## Cytogenetics.mono4_4q_abn4q -0.84423 0.42989 0.18296 -4.61 3.9e-06 ***
## Treatment.tr98B -1.08013 0.33955 0.10017 -10.78 < 2e-16 ***
## Treatment.tr0704 0.71589 2.04600 0.05723 12.51 < 2e-16 ***
## Treatment.ATRA -0.20486 0.81476 0.05855 -3.50 0.00047 ***
## Treatment.VPA -0.21954 0.80289 0.07906 -2.78 0.00549 **
## Treatment.TPL -0.39675 0.67250 0.05756 -6.89 5.5e-12 ***
## Clinical.gender 0.38386 1.46794 0.03347 11.47 < 2e-16 ***
## Clinical.Performance_ECOG 0.16124 1.17496 0.02406 6.70 2.0e-11 ***
## Clinical.BM_Blasts 0.17872 1.19569 0.07625 2.34 0.01909 *
## Clinical.PB_Blasts 0.58754 1.79955 0.05979 9.83 < 2e-16 ***
## Clinical.LDH_ 0.58356 1.79240 0.22195 2.63 0.00856 **
## Clinical.HB 0.02832 1.02872 0.08178 0.35 0.72914
## Clinical.platelet 0.54743 1.72880 0.19222 2.85 0.00440 **
## Clinical.oAML 0.79409 2.21242 0.09514 8.35 < 2e-16 ***
## Clinical.sAML -0.12796 0.87989 0.09253 -1.38 0.16668
## GeneGene.GATA2.CEBPA -0.39427 0.67417 0.14502 -2.72 0.00655 **
## GeneGene.KRAS.CEBPA -1.09652 0.33403 0.27592 -3.97 7.1e-05 ***
## GeneGene.NF1.DNMT3A 0.68034 1.97454 0.18661 3.65 0.00027 ***
## GeneGene.NPM1.CEBPA -0.43980 0.64417 0.13080 -3.36 0.00077 ***
## GeneGene.NPM1.IDH2 -0.34151 0.71070 0.13366 -2.56 0.01062 *
## GeneGene.NPM1.KRAS -1.12372 0.32507 0.18411 -6.10 1.0e-09 ***
## GeneGene.NPM1.MYC -0.57135 0.56476 0.24174 -2.36 0.01810 *
## GeneGene.NRAS.DNMT3A 0.32955 1.39034 0.09690 3.40 0.00067 ***
## GeneGene.NRAS.EZH2 0.34154 1.40712 0.23019 1.48 0.13787
## GeneGene.PTPN11.DNMT3A 0.52964 1.69833 0.14495 3.65 0.00026 ***
## GeneGene.PTPN11.NPM1 -0.66921 0.51211 0.14565 -4.59 4.3e-06 ***
## GeneGene.PTPN11.NRAS -0.34303 0.70962 0.13963 -2.46 0.01402 *
## GeneGene.RAD21.PTPN11 -0.73565 0.47919 0.29374 -2.50 0.01226 *
## GeneGene.RUNX1.EZH2 0.46782 1.59650 0.18495 2.53 0.01143 *
## GeneGene.RUNX1.IDH2 -0.42620 0.65298 0.18429 -2.31 0.02074 *
## GeneGene.RUNX1.NRAS 0.52367 1.68821 0.14117 3.71 0.00021 ***
## GeneGene.SFRS2.ASXL1 -0.76419 0.46571 0.20158 -3.79 0.00015 ***
## GeneGene.SFRS2.IDH1 -0.64937 0.52237 0.22402 -2.90 0.00375 **
## GeneGene.TET2.DNMT3A 0.38531 1.47008 0.11047 3.49 0.00049 ***
## GeneGene.TET2.KRAS 0.81413 2.25721 0.20790 3.92 9.0e-05 ***
## GeneGene.TET2.NRAS -0.64407 0.52515 0.13419 -4.80 1.6e-06 ***
## GeneGene.TET2.SFRS2 -0.31145 0.73238 0.16636 -1.87 0.06118 .
## GeneGene.TET2.STAG2 -0.43007 0.65047 0.22799 -1.89 0.05925 .
## GeneGene.TP53.NRAS -0.80030 0.44920 0.25331 -3.16 0.00158 **
## GeneGene.FLT3_ITD.CBL -1.50971 0.22097 0.37865 -3.99 6.7e-05 ***
## GeneGene.FLT3_ITD.CEBPA 0.45943 1.58317 0.13248 3.47 0.00052 ***
## GeneGene.FLT3_ITD.PTPN11 -0.55284 0.57532 0.18891 -2.93 0.00343 **
## GeneGene.FLT3_TKD.IDH2 -1.08814 0.33684 0.33491 -3.25 0.00116 **
## GeneGene.FLT3_TKD.RUNX1 0.67888 1.97167 0.18117 3.75 0.00018 ***
## GeneGene.FLT3_TKD.WT1 0.52195 1.68532 0.18989 2.75 0.00598 **
## GeneCyto.CEBPA.minus9q 0.37065 1.44867 0.18179 2.04 0.04147 *
## GeneCyto.CEBPA.plus21 0.24639 1.27940 0.23425 1.05 0.29288
## GeneCyto.DNMT3A.MLL_PTD 0.34756 1.41561 0.16327 2.13 0.03328 *
## GeneCyto.DNMT3A.minus5_5q -0.93271 0.39349 0.23086 -4.04 5.3e-05 ***
## GeneCyto.EZH2.plus8_8q 0.15125 1.16329 0.20578 0.74 0.46234
## GeneCyto.IDH2.plus8_8q 0.44345 1.55807 0.18274 2.43 0.01524 *
## GeneCyto.KIT.t_8_21 -0.34580 0.70765 0.18026 -1.92 0.05507 .
## GeneCyto.KIT.inv16_t16_16 0.26177 1.29922 0.15067 1.74 0.08233 .
## GeneCyto.KRAS.minus7_7q -0.44901 0.63826 0.20785 -2.16 0.03075 *
## GeneCyto.NPM1.plus8_8q 0.62817 1.87418 0.12395 5.07 4.0e-07 ***
## GeneCyto.NRAS.MLL_PTD -0.54143 0.58192 0.17994 -3.01 0.00262 **
## GeneCyto.NRAS.minus7_7q -0.52269 0.59293 0.15945 -3.28 0.00105 **
## GeneCyto.NRAS.plus8_8q 0.67752 1.96899 0.13609 4.98 6.4e-07 ***
## GeneCyto.NRAS.plus22 -0.96500 0.38098 0.35947 -2.68 0.00726 **
## GeneCyto.NRAS.minusY -0.64231 0.52608 0.20078 -3.20 0.00138 **
## GeneCyto.NRAS.inv16_t16_16 0.14492 1.15595 0.09935 1.46 0.14464
## GeneCyto.RUNX1.MLL_PTD -0.51649 0.59661 0.14779 -3.49 0.00047 ***
## GeneCyto.RUNX1.minus7_7q -0.63149 0.53180 0.17687 -3.57 0.00036 ***
## GeneCyto.RUNX1.plus13 -1.36650 0.25500 0.33729 -4.05 5.1e-05 ***
## GeneCyto.TP53.minus5_5q -0.58828 0.55528 0.24878 -2.36 0.01804 *
## GeneCyto.TP53.mono17_17p_abn17p -0.27987 0.75588 0.18585 -1.51 0.13209
## GeneCyto.TP53.mono4_4q_abn4q -0.60505 0.54605 0.28787 -2.10 0.03557 *
## GeneCyto.U2AF1.minus20_20q 0.68838 1.99049 0.25389 2.71 0.00670 **
## GeneCyto.WT1.plus8_8q -0.42770 0.65200 0.16550 -2.58 0.00976 **
## GeneCyto.FLT3_ITD.MLL_PTD 0.42886 1.53551 0.14319 2.99 0.00274 **
## GeneCyto.FLT3_ITD.minus9q -0.86951 0.41916 0.21361 -4.07 4.7e-05 ***
## GeneCyto.FLT3_ITD.t_15_17 0.25254 1.28729 0.14535 1.74 0.08231 .
## GeneCyto.FLT3_TKD.minus7_7q -1.01410 0.36273 0.25507 -3.98 7.0e-05 ***
## GeneTreat.ASXL1.tr0704 -0.48899 0.61325 0.13223 -3.70 0.00022 ***
## GeneTreat.CBL.tr0704 0.57590 1.77873 0.12713 4.53 5.9e-06 ***
## GeneTreat.CEBPA.ATRA 0.77066 2.16120 0.10596 7.27 3.5e-13 ***
## GeneTreat.CEBPA.VPA 0.58298 1.79136 0.14996 3.89 0.00010 ***
## GeneTreat.CEBPA.TPL -0.20979 0.81076 0.14116 -1.49 0.13725
## GeneTreat.DNMT3A.tr0704 0.32373 1.38227 0.07728 4.19 2.8e-05 ***
## GeneTreat.IDH1.TPL -0.34411 0.70885 0.17746 -1.94 0.05249 .
## GeneTreat.IDH2.tr0704 -0.22210 0.80084 0.13547 -1.64 0.10112
## GeneTreat.IDH2.ATRA -0.35685 0.69988 0.14402 -2.48 0.01322 *
## GeneTreat.KIT.tr0704 -0.34561 0.70778 0.12663 -2.73 0.00635 **
## GeneTreat.MLL2.tr0704 -0.55483 0.57417 0.18809 -2.95 0.00318 **
## GeneTreat.MYC.tr0704 0.03828 1.03902 0.21543 0.18 0.85897
## GeneTreat.NPM1.VPA -0.26604 0.76641 0.11702 -2.27 0.02300 *
## GeneTreat.NRAS.tr0704 0.48251 1.62014 0.07523 6.41 1.4e-10 ***
## GeneTreat.NRAS.TPL -0.75537 0.46984 0.11620 -6.50 8.0e-11 ***
## GeneTreat.PTPN11.tr0704 0.51073 1.66651 0.10586 4.82 1.4e-06 ***
## GeneTreat.PTPN11.ATRA 0.65651 1.92806 0.11828 5.55 2.8e-08 ***
## GeneTreat.PTPN11.TPL 0.39841 1.48945 0.13162 3.03 0.00247 **
## GeneTreat.RUNX1.ATRA 0.36783 1.44460 0.12397 2.97 0.00301 **
## GeneTreat.RUNX1.VPA -0.63313 0.53093 0.17822 -3.55 0.00038 ***
## GeneTreat.RUNX1.TPL -0.14873 0.86180 0.13012 -1.14 0.25304
## GeneTreat.SFRS2.tr0704 0.11997 1.12746 0.10738 1.12 0.26388
## GeneTreat.SFRS2.VPA 1.00813 2.74046 0.24736 4.08 4.6e-05 ***
## GeneTreat.TET2.tr98B -0.15489 0.85651 0.16960 -0.91 0.36112
## GeneTreat.TET2.ATRA 0.12118 1.12882 0.11291 1.07 0.28318
## GeneTreat.TET2.TPL -0.70192 0.49563 0.12796 -5.49 4.1e-08 ***
## GeneTreat.TP53.ATRA -0.27251 0.76147 0.23562 -1.16 0.24744
## GeneTreat.TP53.TPL -0.96859 0.37962 0.30401 -3.19 0.00144 **
## GeneTreat.FLT3_ITD.tr98B -0.82057 0.44018 0.19506 -4.21 2.6e-05 ***
## GeneTreat.FLT3_ITD.tr0704 -0.51050 0.60019 0.09647 -5.29 1.2e-07 ***
## GeneTreat.FLT3_ITD.ATRA -0.55240 0.57557 0.10090 -5.47 4.4e-08 ***
## GeneTreat.FLT3_ITD.VPA 0.21556 1.24056 0.14384 1.50 0.13397
## GeneTreat.FLT3_TKD.ATRA -0.72408 0.48477 0.15180 -4.77 1.8e-06 ***
## CytoTreat.MLL_PTD.tr0704 0.57248 1.77266 0.15004 3.82 0.00014 ***
## CytoTreat.MLL_PTD.ATRA 0.11197 1.11848 0.15001 0.75 0.45542
## CytoTreat.t_MLL.TPL 0.58081 1.78748 0.17598 3.30 0.00097 ***
## CytoTreat.inv3_t3_3.tr0704 -1.24682 0.28742 0.34484 -3.62 0.00030 ***
## CytoTreat.minus5_5q.ATRA -0.42372 0.65461 0.24384 -1.74 0.08226 .
## CytoTreat.minus7_7q.tr0704 0.42804 1.53425 0.08977 4.77 1.9e-06 ***
## CytoTreat.plus8_8q.tr98B 0.79232 2.20852 0.18156 4.36 1.3e-05 ***
## CytoTreat.plus8_8q.tr0704 0.70726 2.02843 0.11564 6.12 9.6e-10 ***
## CytoTreat.plus8_8q.ATRA -0.32461 0.72281 0.12177 -2.67 0.00768 **
## CytoTreat.plus8_8q.TPL 0.45339 1.57364 0.11062 4.10 4.2e-05 ***
## CytoTreat.minus9q.tr0704 0.31240 1.36670 0.17423 1.79 0.07298 .
## CytoTreat.minus9q.ATRA 0.36636 1.44248 0.17165 2.13 0.03281 *
## CytoTreat.mono12_12p_abn12p.tr0704 0.71569 2.04560 0.10662 6.71 1.9e-11 ***
## CytoTreat.plus21.tr0704 0.46035 1.58463 0.19473 2.36 0.01807 *
## CytoTreat.plus21.TPL 0.43765 1.54907 0.19583 2.23 0.02542 *
## CytoTreat.minusY.tr0704 0.13824 1.14825 0.17164 0.81 0.42060
## CytoTreat.minusY.ATRA 0.43504 1.54502 0.18980 2.29 0.02190 *
## CytoTreat.abn3q_other.tr0704 0.17352 1.18949 0.16364 1.06 0.28897
## CytoTreat.plus11_11q.ATRA -0.59622 0.55089 0.16758 -3.56 0.00037 ***
## CytoTreat.mono4_4q_abn4q.tr0704 0.00591 1.00593 0.23418 0.03 0.97987
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Genetics.CREBBP 2.010 0.498 1.439 2.808
## Genetics.DNMT3A 1.289 0.776 1.128 1.474
## Genetics.EP300 3.110 0.322 2.405 4.021
## Genetics.FBXW7 2.608 0.383 1.679 4.050
## Genetics.IDH1 0.529 1.889 0.456 0.615
## Genetics.IDH2 0.838 1.193 0.667 1.053
## Genetics.KRAS 1.316 0.760 1.078 1.606
## Genetics.MYC 1.879 0.532 1.305 2.704
## Genetics.NPM1 1.302 0.768 1.181 1.435
## Genetics.RAD21 0.707 1.415 0.598 0.835
## Genetics.STAG2 0.899 1.112 0.720 1.123
## Genetics.TET2 1.576 0.634 1.342 1.851
## Genetics.TP53 0.727 1.376 0.470 1.125
## Genetics.U2AF1 1.198 0.835 0.943 1.522
## Genetics.FLT3_ITD 1.245 0.803 1.092 1.420
## Genetics.FLT3_TKD 1.096 0.912 0.943 1.274
## Cytogenetics.MLL_PTD 1.333 0.750 1.014 1.753
## Cytogenetics.t_MLL 1.280 0.781 1.070 1.531
## Cytogenetics.inv3_t3_3 0.493 2.029 0.272 0.892
## Cytogenetics.plus8_8q 1.459 0.686 1.223 1.740
## Cytogenetics.minus9q 1.718 0.582 1.256 2.350
## Cytogenetics.plus13 2.310 0.433 1.676 3.183
## Cytogenetics.minus18_18q 2.348 0.426 1.789 3.082
## Cytogenetics.minus20_20q 1.250 0.800 0.958 1.632
## Cytogenetics.plus21 1.949 0.513 1.385 2.744
## Cytogenetics.plus22 0.658 1.519 0.506 0.856
## Cytogenetics.t_15_17 1.433 0.698 1.207 1.703
## Cytogenetics.t_8_21 0.700 1.428 0.571 0.860
## Cytogenetics.t_6_9 1.463 0.684 1.105 1.936
## Cytogenetics.abn3q_other 2.021 0.495 1.614 2.529
## Cytogenetics.mono4_4q_abn4q 0.430 2.326 0.300 0.615
## Treatment.tr98B 0.340 2.945 0.279 0.413
## Treatment.tr0704 2.046 0.489 1.829 2.289
## Treatment.ATRA 0.815 1.227 0.726 0.914
## Treatment.VPA 0.803 1.246 0.688 0.937
## Treatment.TPL 0.672 1.487 0.601 0.753
## Clinical.gender 1.468 0.681 1.375 1.567
## Clinical.Performance_ECOG 1.175 0.851 1.121 1.232
## Clinical.BM_Blasts 1.196 0.836 1.030 1.388
## Clinical.PB_Blasts 1.800 0.556 1.601 2.023
## Clinical.LDH_ 1.792 0.558 1.160 2.769
## Clinical.HB 1.029 0.972 0.876 1.208
## Clinical.platelet 1.729 0.578 1.186 2.520
## Clinical.oAML 2.212 0.452 1.836 2.666
## Clinical.sAML 0.880 1.137 0.734 1.055
## GeneGene.GATA2.CEBPA 0.674 1.483 0.507 0.896
## GeneGene.KRAS.CEBPA 0.334 2.994 0.195 0.574
## GeneGene.NF1.DNMT3A 1.975 0.506 1.370 2.846
## GeneGene.NPM1.CEBPA 0.644 1.552 0.499 0.832
## GeneGene.NPM1.IDH2 0.711 1.407 0.547 0.924
## GeneGene.NPM1.KRAS 0.325 3.076 0.227 0.466
## GeneGene.NPM1.MYC 0.565 1.771 0.352 0.907
## GeneGene.NRAS.DNMT3A 1.390 0.719 1.150 1.681
## GeneGene.NRAS.EZH2 1.407 0.711 0.896 2.209
## GeneGene.PTPN11.DNMT3A 1.698 0.589 1.278 2.256
## GeneGene.PTPN11.NPM1 0.512 1.953 0.385 0.681
## GeneGene.PTPN11.NRAS 0.710 1.409 0.540 0.933
## GeneGene.RAD21.PTPN11 0.479 2.087 0.269 0.852
## GeneGene.RUNX1.EZH2 1.597 0.626 1.111 2.294
## GeneGene.RUNX1.IDH2 0.653 1.531 0.455 0.937
## GeneGene.RUNX1.NRAS 1.688 0.592 1.280 2.226
## GeneGene.SFRS2.ASXL1 0.466 2.147 0.314 0.691
## GeneGene.SFRS2.IDH1 0.522 1.914 0.337 0.810
## GeneGene.TET2.DNMT3A 1.470 0.680 1.184 1.825
## GeneGene.TET2.KRAS 2.257 0.443 1.502 3.393
## GeneGene.TET2.NRAS 0.525 1.904 0.404 0.683
## GeneGene.TET2.SFRS2 0.732 1.365 0.529 1.015
## GeneGene.TET2.STAG2 0.650 1.537 0.416 1.017
## GeneGene.TP53.NRAS 0.449 2.226 0.273 0.738
## GeneGene.FLT3_ITD.CBL 0.221 4.525 0.105 0.464
## GeneGene.FLT3_ITD.CEBPA 1.583 0.632 1.221 2.053
## GeneGene.FLT3_ITD.PTPN11 0.575 1.738 0.397 0.833
## GeneGene.FLT3_TKD.IDH2 0.337 2.969 0.175 0.649
## GeneGene.FLT3_TKD.RUNX1 1.972 0.507 1.382 2.812
## GeneGene.FLT3_TKD.WT1 1.685 0.593 1.162 2.445
## GeneCyto.CEBPA.minus9q 1.449 0.690 1.014 2.069
## GeneCyto.CEBPA.plus21 1.279 0.782 0.808 2.025
## GeneCyto.DNMT3A.MLL_PTD 1.416 0.706 1.028 1.949
## GeneCyto.DNMT3A.minus5_5q 0.393 2.541 0.250 0.619
## GeneCyto.EZH2.plus8_8q 1.163 0.860 0.777 1.741
## GeneCyto.IDH2.plus8_8q 1.558 0.642 1.089 2.229
## GeneCyto.KIT.t_8_21 0.708 1.413 0.497 1.008
## GeneCyto.KIT.inv16_t16_16 1.299 0.770 0.967 1.746
## GeneCyto.KRAS.minus7_7q 0.638 1.567 0.425 0.959
## GeneCyto.NPM1.plus8_8q 1.874 0.534 1.470 2.390
## GeneCyto.NRAS.MLL_PTD 0.582 1.718 0.409 0.828
## GeneCyto.NRAS.minus7_7q 0.593 1.687 0.434 0.810
## GeneCyto.NRAS.plus8_8q 1.969 0.508 1.508 2.571
## GeneCyto.NRAS.plus22 0.381 2.625 0.188 0.771
## GeneCyto.NRAS.minusY 0.526 1.901 0.355 0.780
## GeneCyto.NRAS.inv16_t16_16 1.156 0.865 0.951 1.404
## GeneCyto.RUNX1.MLL_PTD 0.597 1.676 0.447 0.797
## GeneCyto.RUNX1.minus7_7q 0.532 1.880 0.376 0.752
## GeneCyto.RUNX1.plus13 0.255 3.922 0.132 0.494
## GeneCyto.TP53.minus5_5q 0.555 1.801 0.341 0.904
## GeneCyto.TP53.mono17_17p_abn17p 0.756 1.323 0.525 1.088
## GeneCyto.TP53.mono4_4q_abn4q 0.546 1.831 0.311 0.960
## GeneCyto.U2AF1.minus20_20q 1.990 0.502 1.210 3.274
## GeneCyto.WT1.plus8_8q 0.652 1.534 0.471 0.902
## GeneCyto.FLT3_ITD.MLL_PTD 1.536 0.651 1.160 2.033
## GeneCyto.FLT3_ITD.minus9q 0.419 2.386 0.276 0.637
## GeneCyto.FLT3_ITD.t_15_17 1.287 0.777 0.968 1.712
## GeneCyto.FLT3_TKD.minus7_7q 0.363 2.757 0.220 0.598
## GeneTreat.ASXL1.tr0704 0.613 1.631 0.473 0.795
## GeneTreat.CBL.tr0704 1.779 0.562 1.386 2.282
## GeneTreat.CEBPA.ATRA 2.161 0.463 1.756 2.660
## GeneTreat.CEBPA.VPA 1.791 0.558 1.335 2.403
## GeneTreat.CEBPA.TPL 0.811 1.233 0.615 1.069
## GeneTreat.DNMT3A.tr0704 1.382 0.723 1.188 1.608
## GeneTreat.IDH1.TPL 0.709 1.411 0.501 1.004
## GeneTreat.IDH2.tr0704 0.801 1.249 0.614 1.044
## GeneTreat.IDH2.ATRA 0.700 1.429 0.528 0.928
## GeneTreat.KIT.tr0704 0.708 1.413 0.552 0.907
## GeneTreat.MLL2.tr0704 0.574 1.742 0.397 0.830
## GeneTreat.MYC.tr0704 1.039 0.962 0.681 1.585
## GeneTreat.NPM1.VPA 0.766 1.305 0.609 0.964
## GeneTreat.NRAS.tr0704 1.620 0.617 1.398 1.878
## GeneTreat.NRAS.TPL 0.470 2.128 0.374 0.590
## GeneTreat.PTPN11.tr0704 1.667 0.600 1.354 2.051
## GeneTreat.PTPN11.ATRA 1.928 0.519 1.529 2.431
## GeneTreat.PTPN11.TPL 1.489 0.671 1.151 1.928
## GeneTreat.RUNX1.ATRA 1.445 0.692 1.133 1.842
## GeneTreat.RUNX1.VPA 0.531 1.883 0.374 0.753
## GeneTreat.RUNX1.TPL 0.862 1.160 0.668 1.112
## GeneTreat.SFRS2.tr0704 1.127 0.887 0.913 1.392
## GeneTreat.SFRS2.VPA 2.740 0.365 1.688 4.450
## GeneTreat.TET2.tr98B 0.857 1.168 0.614 1.194
## GeneTreat.TET2.ATRA 1.129 0.886 0.905 1.408
## GeneTreat.TET2.TPL 0.496 2.018 0.386 0.637
## GeneTreat.TP53.ATRA 0.761 1.313 0.480 1.208
## GeneTreat.TP53.TPL 0.380 2.634 0.209 0.689
## GeneTreat.FLT3_ITD.tr98B 0.440 2.272 0.300 0.645
## GeneTreat.FLT3_ITD.tr0704 0.600 1.666 0.497 0.725
## GeneTreat.FLT3_ITD.ATRA 0.576 1.737 0.472 0.701
## GeneTreat.FLT3_ITD.VPA 1.241 0.806 0.936 1.645
## GeneTreat.FLT3_TKD.ATRA 0.485 2.063 0.360 0.653
## CytoTreat.MLL_PTD.tr0704 1.773 0.564 1.321 2.379
## CytoTreat.MLL_PTD.ATRA 1.118 0.894 0.834 1.501
## CytoTreat.t_MLL.TPL 1.787 0.559 1.266 2.524
## CytoTreat.inv3_t3_3.tr0704 0.287 3.479 0.146 0.565
## CytoTreat.minus5_5q.ATRA 0.655 1.528 0.406 1.056
## CytoTreat.minus7_7q.tr0704 1.534 0.652 1.287 1.829
## CytoTreat.plus8_8q.tr98B 2.209 0.453 1.547 3.152
## CytoTreat.plus8_8q.tr0704 2.028 0.493 1.617 2.544
## CytoTreat.plus8_8q.ATRA 0.723 1.383 0.569 0.918
## CytoTreat.plus8_8q.TPL 1.574 0.635 1.267 1.955
## CytoTreat.minus9q.tr0704 1.367 0.732 0.971 1.923
## CytoTreat.minus9q.ATRA 1.442 0.693 1.030 2.019
## CytoTreat.mono12_12p_abn12p.tr0704 2.046 0.489 1.660 2.521
## CytoTreat.plus21.tr0704 1.585 0.631 1.082 2.321
## CytoTreat.plus21.TPL 1.549 0.646 1.055 2.274
## CytoTreat.minusY.tr0704 1.148 0.871 0.820 1.607
## CytoTreat.minusY.ATRA 1.545 0.647 1.065 2.241
## CytoTreat.abn3q_other.tr0704 1.189 0.841 0.863 1.639
## CytoTreat.plus11_11q.ATRA 0.551 1.815 0.397 0.765
## CytoTreat.mono4_4q_abn4q.tr0704 1.006 0.994 0.636 1.592
##
## Concordance= 0.749 (se = 0.005 )
## Rsquare= 0.436 (max possible= 1 )
## Likelihood ratio test= 3834 on 156 df, p=0
## Wald test = 3573 on 156 df, p=0
## Score (logrank) test = 3932 on 156 df, p=0
X <- cbind(dataList$Genetics, dataList$Cytogenetics, dataList$Treatment, dataList$Clinical)
scope <- sub("(GeneGene|GeneTreat|CytoTreat|GeneCyto).", "", colnames(dataFrame)[grep("(GeneGene|GeneTreat|CytoTreat|GeneCyto)",
names(dataFrame))])
pInt <- testInteractions(X, survival, scope)
## Error: undefined columns selected
simPInt <- testInteractions(X, simSurvival, scope)
## Error: undefined columns selected
simDataPInt <- testInteractions(simDataFrame[, groups %in% c("Clinical", "Genetics", "Treatment", "Cytogenetics")], simDataSurvival,
scope)
## Error: undefined columns selected
simDataPInt <- testInteractions(simDataFrame[1:1000, groups %in% c("Clinical", "Genetics", "Treatment", "Cytogenetics")],
simDataSurvival[1:1000], scope)
## Error: undefined columns selected
set.seed(42)
v <- coxRFXFit$sigma2 * coxRFXFit$df[-8]/as.numeric(table(groups[whichRFXTD]) - 1)
# v['GeneGene'] <- v['GeneTreat'] <- v['CytoTreat'] <- v['GeneCyto'] <- 0.1
mainIdx <- groups %in% c("Clinical", "Genetics", "Treatment", "Cytogenetics")
pairs <- getPairs(names(simDataFrame)[mainIdx], scope)
# simulations <- list() for(size in c(100,1000, 10000)){ simulations[[as.character(size)]] <- list() for(i in 1:3){
# cat('.')
simCoef <- numeric(ncol(simDataFrame))
names(simCoef) <- colnames(simDataFrame)
w <- c(which(mainIdx), sample(which(!mainIdx), 500))
simCoef[w] <- rnorm(length(groups[w]), 0, sqrt(v[groups[w]]))
s <- sample(10000, size)
## Error: object 'size' not found
simDataRisk <- (as.matrix(simDataFrame[s, ]) %*% simCoef)
simDataRisk <- simDataRisk - mean(simDataRisk)
simDataSurvival <- simSurvNonp(simDataRisk, basehaz(coxRFXFit, centered = FALSE), survival)
simDataPInt <- testInteractions(simDataFrame[s, mainIdx], simDataSurvival, pairs)
# simulations[[as.character(size)]][[i]] <- list(simCoef=simCoef, simDataRisk=simDataRisk,
# simDataSurvival=simDataSurvival, simDataPInt=simDataPInt, s=s, w=w) } }
plot(simCoef[!mainIdx], simDataPInt$pWald, log = "y", col = ifelse(simCoef[!mainIdx] == 0, 2, 1))
idx <- simCoef[!mainIdx] == 0
qqplot(simDataPInt$pWald[idx], simDataPInt$pWald[!idx], log = "xy", xlab = "Coef == 0", ylab = "Coef != 0")
abline(0, 1)
title("QQ-plot")
which.min(simDataPInt$pWald)
## [1] 1273
simDataPInt[41, ]
## pWald pLR coef
## Genetics.GATA2:Genetics.EZH2 0.008738 0.001116 -1.338
plot(survfit(simDataSurvival ~ Genetics.GATA2 + Genetics.EZH2, data = simDataFrame[s, ]), col = set1)
plot(survfit(simDataSurvival ~ Genetics.GATA2 + Genetics.TP53, data = simDataFrame[s, ]), col = set1)
plot(simCoef[!mainIdx], simDataPInt$pLR, log = "y", col = ifelse(simCoef[!mainIdx] == 0, 2, 1))
idx <- simCoef[!mainIdx] == 0
qqplot(simDataPInt$pLR[idx], simDataPInt$pLR[!idx], log = "xy", xlab = "Coef == 0", ylab = "Coef != 0")
abline(0, 1)
title("QQ-plot")
meanRisk <- sapply(pairs, function(p) mean(simDataRisk[simDataFrame[s, p[1]] * simDataFrame[s, p[2]] == 1]) - simCoef[p[1]] -
simCoef[p[2]])
plot(simCoef[!mainIdx], simDataPInt$coef, xlab = "True coefficient", ylab = "Est. coefficient")
legend("topleft", bty = "n", paste("rho =", round(cor(simCoef[!mainIdx][simCoef[!mainIdx] != 0], simDataPInt$coef[simCoef[!mainIdx] !=
0], use = "complete", method = "spearman"), 2)))
plot(meanRisk, simDataPInt$coef, xlab = "Residual risk", ylab = "Est. coefficient")
legend("topleft", bty = "n", paste("rho =", round(cor(meanRisk, simDataPInt$coef, use = "complete", method = "spearman"),
2)))
load("temp.RData")
plot(simCoef[!mainIdx], simDataPIntAll$coef, xlab = "True coefficient", ylab = "Est. coefficient")
legend("topleft", bty = "n", paste("rho =", round(cor(simCoef[!mainIdx][simCoef[!mainIdx] != 0], simDataPIntAll$coef[simCoef[!mainIdx] !=
0], use = "complete", method = "spearman"), 2)))
plot(meanRisk, simDataPIntAll$coef, xlab = "Residual risk", ylab = "Est. coefficient")
legend("topleft", bty = "n", paste("rho =", round(cor(meanRisk, simDataPIntAll$coef, use = "complete", method = "spearman"),
2)))
plot(simCoef[!mainIdx], p.adjust(simDataPInt$pWald, "BH"), log = "y", main = "Marginal only", col = ifelse(simCoef[!mainIdx] ==
0, 2, 1))
abline(h = 0.1)
plot(simCoef[!mainIdx], p.adjust(simDataPIntAll$pWald, "BH"), log = "y", main = "Incl. all mains", col = ifelse(simCoef[!mainIdx] ==
0, 2, 1))
abline(h = 0.1)
#
#
# simResults <- list() for(size in c(100,1000,10000)){ method <- 'BH' simResults[[as.character(size)]]$tpr.bh <-
# sapply(simulations[[as.character(size)]], function(sim){ c <- cut(colSums(simDataFrame[sim$s,!mainIdx]), breaks =
# c(0,1,10,50,100,500,1000, 5000), include.lowest=TRUE) idx <- sim$simCoef[!mainIdx] != 0
# sapply(split((p.adjust(sim$simDataPInt$pLR,method) < 0.1)[idx], c[idx]), mean) })
# simResults[[as.character(size)]]$fpr.bh <- sapply(simulations[[as.character(size)]], function(sim){ c <-
# cut(colSums(simDataFrame[sim$s,!mainIdx]), breaks = c(0,1,10,50,100,500,1000, 5000), include.lowest=TRUE) idx <-
# sim$simCoef[!mainIdx] == 0 sapply(split((p.adjust(sim$simDataPInt$pLR,method) < 0.1)[idx], c[idx]), mean) }) method
# <- 'bonf' simResults[[as.character(size)]]$tpr.bonf <- sapply(simulations[[as.character(size)]], function(sim){ c <-
# cut(colSums(simDataFrame[sim$s,!mainIdx]), breaks = c(0,1,10,50,100,500,1000, 5000), include.lowest=TRUE) idx <-
# sim$simCoef[!mainIdx] != 0 sapply(split((p.adjust(sim$simDataPInt$pLR,method) < 0.1)[idx], c[idx]), mean) })
# simResults[[as.character(size)]]$fpr.bonf <- sapply(simulations[[as.character(size)]], function(sim){ c <-
# cut(colSums(simDataFrame[sim$s,!mainIdx]), breaks = c(0,1,10,50,100,500,1000, 5000), include.lowest=TRUE) idx <-
# sim$simCoef[!mainIdx] == 0 sapply(split((p.adjust(sim$simDataPInt$pLR,method) < 0.1)[idx], c[idx]), mean) }) }
#
# plot(sapply(split(p.adjust(simDataPInt$pWald,'BH') < 0.1 & simCoef[-(1:ncol(X))]!=0, c), mean), col='red',
# ylab='TPR', type='l', lty=3, xaxt='n', xlab='') axis(side=1, levels(c), at=1:nlevels(c))
# lines(sapply(split(p.adjust(simDataPInt$pWald,'BH') < 0.1 & simCoef[-(1:ncol(X))]==0, c), mean), lty=3)
# lines(sapply(split(p.adjust(simDataPInt$pWald) < 0.1 & c), mean), col='red', ylab='TPR', type='l')
# lines(sapply(split(p.adjust(simDataPInt$pWald) < 0.1 & simCoef[-(1:ncol(X))]==0, c), mean))
simulate <- function(X, coxRFXFit, whichGene, treatEffect = -0.5, geneTreatEffect = 0.5, nSim = 1) {
treatment <- rbinom(nrow(X), 1, 0.5) ## assume randomize treatment
Y <- cbind(X, Test = treatment)
risk <- as.matrix(X) %*% coxRFXFit$coef + treatment * treatEffect + X[, whichGene] * treatment * geneTreatEffect
simSurvival <- simSurvNonp(risk, basehaz(coxRFXFit, centered = FALSE), coxRFXFit$surv)
testInteractions(Y, simSurvival, list(c("Test", whichGene)), includeAllMain = FALSE)
}
set.seed(42)
effectSizes <- seq(-1, 1, 0.1)
simASXL1 <- lapply(c(1000, 10000), function(nSim) sapply(effectSizes, function(e) sapply(1:10, function(i) simulate(simDataFrame[1:nSim,
whichRFX], coxRFXFit, "Genetics.ASXL1", geneTreatEffect = e)[1, 1])))
simNPM1 <- lapply(c(1000, 10000), function(nSim) sapply(effectSizes, function(e) sapply(1:10, function(i) simulate(simDataFrame[1:nSim,
whichRFX], coxRFXFit, "Genetics.NPM1", geneTreatEffect = e)[1, 1])))
par(mfrow = c(1, 2))
for (x in c("simASXL1", "simNPM1")) {
tmp <- get(x)
boxplot(tmp[[1]], log = "y", names = effectSizes, border = "grey", ylim = range(tmp[[2]]))
boxplot(tmp[[2]], log = "y", names = effectSizes, add = TRUE, col = NA)
abline(h = 0.05)
abline(h = 0.05/length(pairs))
title(xlab = "Interaction effect size", ylab = "P-value", main = x)
}
load("/Volumes/mg14/subclones/snps.RData")
getSNPs <- function(samples) {
isCoding = function(subs) {
var = sub("(.*?)?(p\\.|$)", "", info(subs)$VW, perl = TRUE)
var[is.na(var)] = ""
x = sub("(^.)(.+)", "\\1", var)
y = sub("(.+)(.$)", "\\2", var)
return(x != y & x != "")
}
d <- dir("/Volumes/nst_links/live/845/", pattern = "WGA*")
r <- t(sapply(samples, function(s) {
i <<- i + 1
cat(ifelse(i%%100 == 0, "\n", "."))
stem <<- grep(s, d, value = TRUE)[1]
if (is.na(stem))
return(rep(NA, ncol(oncogenics)))
vcf <- readVcf(paste("/Volumes/nst_links/live/845/", stem, "/", stem, ".cave.annot.vcf.gz", sep = ""), "GRCh37")
# genes <- sub('\\|.+','',info(vcf)$VD[info(vcf)$SNP & isCoding(vcf)])
genes <- sub("\\|.+", "", info(vcf)$VD[vcf %over% ensemblVariation & isCoding(vcf)])
colnames(oncogenics) %in% genes
}))
colnames(r) <- colnames(oncogenics)
r
}
snp <- getSNPs(rownames(oncogenics))
save(snp, file = "snp.RData")
dataFrameTD <- dir("/Volumes/nst_links/live/845/", pattern = "WGA*")
allCaveOut <- sapply(rownames(oncogenics), function(s) {
i <<- i + 1
cat(ifelse(i%%100 == 0, "\n", "."))
stem <<- grep(s, dataFrameTD, value = TRUE)[1]
if (is.na(stem))
return(NA)
readVcf(paste("/Volumes/nst_links/live/845/", stem, "/", stem, ".cave.annot.vcf.gz", sep = ""), "GRCh37")
})
save(allCaveOut, file = "allCaveOut.RData")
v <- snps[!is.na(snps$MF)]
s <- matrix(0, ncol = ncol(oncogenics), nrow = nrow(oncogenics), dimnames = dimnames(oncogenics))
i <- 1
for (vcf in allCaveOut) {
if (is.na(vcf))
next
genes <- sub("\\|.+", "", info(vcf)$VD[vcf %over% v & isCoding(vcf)])
s[i, colnames(s) %in% genes] <- 1
i <- i + 1
}